module { util.global private @time_text_embed.timestep_embedder.linear_1.weight = #stream.parameter.named<"model"::"time_text_embed.timestep_embedder.linear_1.weight"> : tensor<3072x256xbf16> util.global private @time_text_embed.timestep_embedder.linear_1.bias = #stream.parameter.named<"model"::"time_text_embed.timestep_embedder.linear_1.bias"> : tensor<3072xbf16> util.global private @time_text_embed.timestep_embedder.linear_2.weight = #stream.parameter.named<"model"::"time_text_embed.timestep_embedder.linear_2.weight"> : tensor<3072x3072xbf16> util.global private @time_text_embed.timestep_embedder.linear_2.bias = #stream.parameter.named<"model"::"time_text_embed.timestep_embedder.linear_2.bias"> : tensor<3072xbf16> util.global private @time_text_embed.guidance_embedder.linear_1.weight = #stream.parameter.named<"model"::"time_text_embed.guidance_embedder.linear_1.weight"> : tensor<3072x256xbf16> util.global private @time_text_embed.guidance_embedder.linear_1.bias = #stream.parameter.named<"model"::"time_text_embed.guidance_embedder.linear_1.bias"> : tensor<3072xbf16> util.global private @time_text_embed.guidance_embedder.linear_2.weight = #stream.parameter.named<"model"::"time_text_embed.guidance_embedder.linear_2.weight"> : tensor<3072x3072xbf16> util.global private @time_text_embed.guidance_embedder.linear_2.bias = #stream.parameter.named<"model"::"time_text_embed.guidance_embedder.linear_2.bias"> : tensor<3072xbf16> util.global private @time_text_embed.text_embedder.linear_1.weight = #stream.parameter.named<"model"::"time_text_embed.text_embedder.linear_1.weight"> : tensor<3072x768xbf16> util.global private @time_text_embed.text_embedder.linear_1.bias = #stream.parameter.named<"model"::"time_text_embed.text_embedder.linear_1.bias"> : tensor<3072xbf16> util.global private @time_text_embed.text_embedder.linear_2.weight = #stream.parameter.named<"model"::"time_text_embed.text_embedder.linear_2.weight"> : tensor<3072x3072xbf16> util.global private @time_text_embed.text_embedder.linear_2.bias = #stream.parameter.named<"model"::"time_text_embed.text_embedder.linear_2.bias"> : tensor<3072xbf16> util.global private @context_embedder.bias = #stream.parameter.named<"model"::"context_embedder.bias"> : tensor<3072xbf16> util.global private @x_embedder.bias = #stream.parameter.named<"model"::"x_embedder.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.0.norm1.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.0.norm1.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.0.norm1.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.0.norm1.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.0.norm1_context.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.0.norm1_context.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.0.norm1_context.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.0.norm1_context.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.0.attn.norm_q.weight = #stream.parameter.named<"model"::"transformer_blocks.0.attn.norm_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.0.attn.norm_k.weight = #stream.parameter.named<"model"::"transformer_blocks.0.attn.norm_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.0.attn.to_q.bias = #stream.parameter.named<"model"::"transformer_blocks.0.attn.to_q.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.0.attn.to_k.bias = #stream.parameter.named<"model"::"transformer_blocks.0.attn.to_k.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.0.attn.to_v.bias = #stream.parameter.named<"model"::"transformer_blocks.0.attn.to_v.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.0.attn.add_k_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.0.attn.add_k_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.0.attn.add_v_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.0.attn.add_v_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.0.attn.add_q_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.0.attn.add_q_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.0.attn.to_out.0.bias = #stream.parameter.named<"model"::"transformer_blocks.0.attn.to_out.0.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.0.attn.to_add_out.bias = #stream.parameter.named<"model"::"transformer_blocks.0.attn.to_add_out.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.0.attn.norm_added_q.weight = #stream.parameter.named<"model"::"transformer_blocks.0.attn.norm_added_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.0.attn.norm_added_k.weight = #stream.parameter.named<"model"::"transformer_blocks.0.attn.norm_added_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.0.ff.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.0.ff.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.0.ff.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.0.ff.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.0.ff_context.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.0.ff_context.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.0.ff_context.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.0.ff_context.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.1.norm1.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.1.norm1.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.1.norm1.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.1.norm1.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.1.norm1_context.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.1.norm1_context.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.1.norm1_context.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.1.norm1_context.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.1.attn.norm_q.weight = #stream.parameter.named<"model"::"transformer_blocks.1.attn.norm_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.1.attn.norm_k.weight = #stream.parameter.named<"model"::"transformer_blocks.1.attn.norm_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.1.attn.to_q.bias = #stream.parameter.named<"model"::"transformer_blocks.1.attn.to_q.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.1.attn.to_k.bias = #stream.parameter.named<"model"::"transformer_blocks.1.attn.to_k.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.1.attn.to_v.bias = #stream.parameter.named<"model"::"transformer_blocks.1.attn.to_v.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.1.attn.add_k_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.1.attn.add_k_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.1.attn.add_v_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.1.attn.add_v_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.1.attn.add_q_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.1.attn.add_q_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.1.attn.to_out.0.bias = #stream.parameter.named<"model"::"transformer_blocks.1.attn.to_out.0.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.1.attn.to_add_out.bias = #stream.parameter.named<"model"::"transformer_blocks.1.attn.to_add_out.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.1.attn.norm_added_q.weight = #stream.parameter.named<"model"::"transformer_blocks.1.attn.norm_added_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.1.attn.norm_added_k.weight = #stream.parameter.named<"model"::"transformer_blocks.1.attn.norm_added_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.1.ff.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.1.ff.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.1.ff.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.1.ff.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.1.ff_context.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.1.ff_context.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.1.ff_context.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.1.ff_context.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.2.norm1.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.2.norm1.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.2.norm1.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.2.norm1.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.2.norm1_context.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.2.norm1_context.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.2.norm1_context.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.2.norm1_context.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.2.attn.norm_q.weight = #stream.parameter.named<"model"::"transformer_blocks.2.attn.norm_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.2.attn.norm_k.weight = #stream.parameter.named<"model"::"transformer_blocks.2.attn.norm_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.2.attn.to_q.bias = #stream.parameter.named<"model"::"transformer_blocks.2.attn.to_q.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.2.attn.to_k.bias = #stream.parameter.named<"model"::"transformer_blocks.2.attn.to_k.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.2.attn.to_v.bias = #stream.parameter.named<"model"::"transformer_blocks.2.attn.to_v.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.2.attn.add_k_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.2.attn.add_k_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.2.attn.add_v_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.2.attn.add_v_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.2.attn.add_q_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.2.attn.add_q_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.2.attn.to_out.0.bias = #stream.parameter.named<"model"::"transformer_blocks.2.attn.to_out.0.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.2.attn.to_add_out.bias = #stream.parameter.named<"model"::"transformer_blocks.2.attn.to_add_out.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.2.attn.norm_added_q.weight = #stream.parameter.named<"model"::"transformer_blocks.2.attn.norm_added_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.2.attn.norm_added_k.weight = #stream.parameter.named<"model"::"transformer_blocks.2.attn.norm_added_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.2.ff.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.2.ff.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.2.ff.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.2.ff.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.2.ff_context.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.2.ff_context.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.2.ff_context.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.2.ff_context.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.3.norm1.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.3.norm1.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.3.norm1.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.3.norm1.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.3.norm1_context.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.3.norm1_context.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.3.norm1_context.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.3.norm1_context.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.3.attn.norm_q.weight = #stream.parameter.named<"model"::"transformer_blocks.3.attn.norm_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.3.attn.norm_k.weight = #stream.parameter.named<"model"::"transformer_blocks.3.attn.norm_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.3.attn.to_q.bias = #stream.parameter.named<"model"::"transformer_blocks.3.attn.to_q.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.3.attn.to_k.bias = #stream.parameter.named<"model"::"transformer_blocks.3.attn.to_k.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.3.attn.to_v.bias = #stream.parameter.named<"model"::"transformer_blocks.3.attn.to_v.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.3.attn.add_k_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.3.attn.add_k_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.3.attn.add_v_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.3.attn.add_v_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.3.attn.add_q_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.3.attn.add_q_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.3.attn.to_out.0.bias = #stream.parameter.named<"model"::"transformer_blocks.3.attn.to_out.0.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.3.attn.to_add_out.bias = #stream.parameter.named<"model"::"transformer_blocks.3.attn.to_add_out.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.3.attn.norm_added_q.weight = #stream.parameter.named<"model"::"transformer_blocks.3.attn.norm_added_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.3.attn.norm_added_k.weight = #stream.parameter.named<"model"::"transformer_blocks.3.attn.norm_added_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.3.ff.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.3.ff.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.3.ff.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.3.ff.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.3.ff_context.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.3.ff_context.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.3.ff_context.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.3.ff_context.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.4.norm1.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.4.norm1.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.4.norm1.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.4.norm1.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.4.norm1_context.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.4.norm1_context.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.4.norm1_context.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.4.norm1_context.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.4.attn.norm_q.weight = #stream.parameter.named<"model"::"transformer_blocks.4.attn.norm_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.4.attn.norm_k.weight = #stream.parameter.named<"model"::"transformer_blocks.4.attn.norm_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.4.attn.to_q.bias = #stream.parameter.named<"model"::"transformer_blocks.4.attn.to_q.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.4.attn.to_k.bias = #stream.parameter.named<"model"::"transformer_blocks.4.attn.to_k.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.4.attn.to_v.bias = #stream.parameter.named<"model"::"transformer_blocks.4.attn.to_v.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.4.attn.add_k_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.4.attn.add_k_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.4.attn.add_v_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.4.attn.add_v_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.4.attn.add_q_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.4.attn.add_q_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.4.attn.to_out.0.bias = #stream.parameter.named<"model"::"transformer_blocks.4.attn.to_out.0.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.4.attn.to_add_out.bias = #stream.parameter.named<"model"::"transformer_blocks.4.attn.to_add_out.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.4.attn.norm_added_q.weight = #stream.parameter.named<"model"::"transformer_blocks.4.attn.norm_added_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.4.attn.norm_added_k.weight = #stream.parameter.named<"model"::"transformer_blocks.4.attn.norm_added_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.4.ff.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.4.ff.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.4.ff.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.4.ff.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.4.ff_context.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.4.ff_context.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.4.ff_context.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.4.ff_context.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.5.norm1.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.5.norm1.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.5.norm1.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.5.norm1.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.5.norm1_context.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.5.norm1_context.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.5.norm1_context.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.5.norm1_context.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.5.attn.norm_q.weight = #stream.parameter.named<"model"::"transformer_blocks.5.attn.norm_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.5.attn.norm_k.weight = #stream.parameter.named<"model"::"transformer_blocks.5.attn.norm_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.5.attn.to_q.bias = #stream.parameter.named<"model"::"transformer_blocks.5.attn.to_q.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.5.attn.to_k.bias = #stream.parameter.named<"model"::"transformer_blocks.5.attn.to_k.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.5.attn.to_v.bias = #stream.parameter.named<"model"::"transformer_blocks.5.attn.to_v.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.5.attn.add_k_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.5.attn.add_k_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.5.attn.add_v_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.5.attn.add_v_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.5.attn.add_q_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.5.attn.add_q_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.5.attn.to_out.0.bias = #stream.parameter.named<"model"::"transformer_blocks.5.attn.to_out.0.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.5.attn.to_add_out.bias = #stream.parameter.named<"model"::"transformer_blocks.5.attn.to_add_out.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.5.attn.norm_added_q.weight = #stream.parameter.named<"model"::"transformer_blocks.5.attn.norm_added_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.5.attn.norm_added_k.weight = #stream.parameter.named<"model"::"transformer_blocks.5.attn.norm_added_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.5.ff.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.5.ff.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.5.ff.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.5.ff.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.5.ff_context.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.5.ff_context.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.5.ff_context.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.5.ff_context.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.6.norm1.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.6.norm1.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.6.norm1.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.6.norm1.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.6.norm1_context.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.6.norm1_context.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.6.norm1_context.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.6.norm1_context.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.6.attn.norm_q.weight = #stream.parameter.named<"model"::"transformer_blocks.6.attn.norm_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.6.attn.norm_k.weight = #stream.parameter.named<"model"::"transformer_blocks.6.attn.norm_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.6.attn.to_q.bias = #stream.parameter.named<"model"::"transformer_blocks.6.attn.to_q.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.6.attn.to_k.bias = #stream.parameter.named<"model"::"transformer_blocks.6.attn.to_k.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.6.attn.to_v.bias = #stream.parameter.named<"model"::"transformer_blocks.6.attn.to_v.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.6.attn.add_k_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.6.attn.add_k_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.6.attn.add_v_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.6.attn.add_v_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.6.attn.add_q_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.6.attn.add_q_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.6.attn.to_out.0.bias = #stream.parameter.named<"model"::"transformer_blocks.6.attn.to_out.0.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.6.attn.to_add_out.bias = #stream.parameter.named<"model"::"transformer_blocks.6.attn.to_add_out.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.6.attn.norm_added_q.weight = #stream.parameter.named<"model"::"transformer_blocks.6.attn.norm_added_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.6.attn.norm_added_k.weight = #stream.parameter.named<"model"::"transformer_blocks.6.attn.norm_added_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.6.ff.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.6.ff.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.6.ff.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.6.ff.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.6.ff_context.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.6.ff_context.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.6.ff_context.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.6.ff_context.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.7.norm1.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.7.norm1.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.7.norm1.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.7.norm1.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.7.norm1_context.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.7.norm1_context.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.7.norm1_context.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.7.norm1_context.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.7.attn.norm_q.weight = #stream.parameter.named<"model"::"transformer_blocks.7.attn.norm_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.7.attn.norm_k.weight = #stream.parameter.named<"model"::"transformer_blocks.7.attn.norm_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.7.attn.to_q.bias = #stream.parameter.named<"model"::"transformer_blocks.7.attn.to_q.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.7.attn.to_k.bias = #stream.parameter.named<"model"::"transformer_blocks.7.attn.to_k.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.7.attn.to_v.bias = #stream.parameter.named<"model"::"transformer_blocks.7.attn.to_v.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.7.attn.add_k_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.7.attn.add_k_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.7.attn.add_v_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.7.attn.add_v_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.7.attn.add_q_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.7.attn.add_q_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.7.attn.to_out.0.bias = #stream.parameter.named<"model"::"transformer_blocks.7.attn.to_out.0.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.7.attn.to_add_out.bias = #stream.parameter.named<"model"::"transformer_blocks.7.attn.to_add_out.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.7.attn.norm_added_q.weight = #stream.parameter.named<"model"::"transformer_blocks.7.attn.norm_added_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.7.attn.norm_added_k.weight = #stream.parameter.named<"model"::"transformer_blocks.7.attn.norm_added_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.7.ff.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.7.ff.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.7.ff.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.7.ff.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.7.ff_context.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.7.ff_context.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.7.ff_context.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.7.ff_context.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.8.norm1.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.8.norm1.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.8.norm1.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.8.norm1.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.8.norm1_context.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.8.norm1_context.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.8.norm1_context.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.8.norm1_context.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.8.attn.norm_q.weight = #stream.parameter.named<"model"::"transformer_blocks.8.attn.norm_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.8.attn.norm_k.weight = #stream.parameter.named<"model"::"transformer_blocks.8.attn.norm_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.8.attn.to_q.bias = #stream.parameter.named<"model"::"transformer_blocks.8.attn.to_q.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.8.attn.to_k.bias = #stream.parameter.named<"model"::"transformer_blocks.8.attn.to_k.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.8.attn.to_v.bias = #stream.parameter.named<"model"::"transformer_blocks.8.attn.to_v.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.8.attn.add_k_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.8.attn.add_k_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.8.attn.add_v_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.8.attn.add_v_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.8.attn.add_q_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.8.attn.add_q_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.8.attn.to_out.0.bias = #stream.parameter.named<"model"::"transformer_blocks.8.attn.to_out.0.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.8.attn.to_add_out.bias = #stream.parameter.named<"model"::"transformer_blocks.8.attn.to_add_out.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.8.attn.norm_added_q.weight = #stream.parameter.named<"model"::"transformer_blocks.8.attn.norm_added_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.8.attn.norm_added_k.weight = #stream.parameter.named<"model"::"transformer_blocks.8.attn.norm_added_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.8.ff.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.8.ff.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.8.ff.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.8.ff.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.8.ff_context.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.8.ff_context.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.8.ff_context.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.8.ff_context.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.9.norm1.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.9.norm1.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.9.norm1.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.9.norm1.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.9.norm1_context.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.9.norm1_context.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.9.norm1_context.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.9.norm1_context.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.9.attn.norm_q.weight = #stream.parameter.named<"model"::"transformer_blocks.9.attn.norm_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.9.attn.norm_k.weight = #stream.parameter.named<"model"::"transformer_blocks.9.attn.norm_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.9.attn.to_q.bias = #stream.parameter.named<"model"::"transformer_blocks.9.attn.to_q.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.9.attn.to_k.bias = #stream.parameter.named<"model"::"transformer_blocks.9.attn.to_k.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.9.attn.to_v.bias = #stream.parameter.named<"model"::"transformer_blocks.9.attn.to_v.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.9.attn.add_k_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.9.attn.add_k_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.9.attn.add_v_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.9.attn.add_v_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.9.attn.add_q_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.9.attn.add_q_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.9.attn.to_out.0.bias = #stream.parameter.named<"model"::"transformer_blocks.9.attn.to_out.0.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.9.attn.to_add_out.bias = #stream.parameter.named<"model"::"transformer_blocks.9.attn.to_add_out.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.9.attn.norm_added_q.weight = #stream.parameter.named<"model"::"transformer_blocks.9.attn.norm_added_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.9.attn.norm_added_k.weight = #stream.parameter.named<"model"::"transformer_blocks.9.attn.norm_added_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.9.ff.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.9.ff.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.9.ff.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.9.ff.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.9.ff_context.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.9.ff_context.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.9.ff_context.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.9.ff_context.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.10.norm1.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.10.norm1.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.10.norm1.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.10.norm1.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.10.norm1_context.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.10.norm1_context.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.10.norm1_context.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.10.norm1_context.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.10.attn.norm_q.weight = #stream.parameter.named<"model"::"transformer_blocks.10.attn.norm_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.10.attn.norm_k.weight = #stream.parameter.named<"model"::"transformer_blocks.10.attn.norm_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.10.attn.to_q.bias = #stream.parameter.named<"model"::"transformer_blocks.10.attn.to_q.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.10.attn.to_k.bias = #stream.parameter.named<"model"::"transformer_blocks.10.attn.to_k.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.10.attn.to_v.bias = #stream.parameter.named<"model"::"transformer_blocks.10.attn.to_v.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.10.attn.add_k_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.10.attn.add_k_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.10.attn.add_v_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.10.attn.add_v_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.10.attn.add_q_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.10.attn.add_q_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.10.attn.to_out.0.bias = #stream.parameter.named<"model"::"transformer_blocks.10.attn.to_out.0.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.10.attn.to_add_out.bias = #stream.parameter.named<"model"::"transformer_blocks.10.attn.to_add_out.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.10.attn.norm_added_q.weight = #stream.parameter.named<"model"::"transformer_blocks.10.attn.norm_added_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.10.attn.norm_added_k.weight = #stream.parameter.named<"model"::"transformer_blocks.10.attn.norm_added_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.10.ff.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.10.ff.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.10.ff.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.10.ff.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.10.ff_context.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.10.ff_context.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.10.ff_context.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.10.ff_context.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.11.norm1.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.11.norm1.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.11.norm1.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.11.norm1.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.11.norm1_context.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.11.norm1_context.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.11.norm1_context.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.11.norm1_context.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.11.attn.norm_q.weight = #stream.parameter.named<"model"::"transformer_blocks.11.attn.norm_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.11.attn.norm_k.weight = #stream.parameter.named<"model"::"transformer_blocks.11.attn.norm_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.11.attn.to_q.bias = #stream.parameter.named<"model"::"transformer_blocks.11.attn.to_q.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.11.attn.to_k.bias = #stream.parameter.named<"model"::"transformer_blocks.11.attn.to_k.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.11.attn.to_v.bias = #stream.parameter.named<"model"::"transformer_blocks.11.attn.to_v.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.11.attn.add_k_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.11.attn.add_k_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.11.attn.add_v_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.11.attn.add_v_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.11.attn.add_q_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.11.attn.add_q_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.11.attn.to_out.0.bias = #stream.parameter.named<"model"::"transformer_blocks.11.attn.to_out.0.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.11.attn.to_add_out.bias = #stream.parameter.named<"model"::"transformer_blocks.11.attn.to_add_out.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.11.attn.norm_added_q.weight = #stream.parameter.named<"model"::"transformer_blocks.11.attn.norm_added_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.11.attn.norm_added_k.weight = #stream.parameter.named<"model"::"transformer_blocks.11.attn.norm_added_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.11.ff.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.11.ff.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.11.ff.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.11.ff.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.11.ff_context.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.11.ff_context.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.11.ff_context.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.11.ff_context.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.12.norm1.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.12.norm1.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.12.norm1.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.12.norm1.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.12.norm1_context.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.12.norm1_context.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.12.norm1_context.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.12.norm1_context.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.12.attn.norm_q.weight = #stream.parameter.named<"model"::"transformer_blocks.12.attn.norm_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.12.attn.norm_k.weight = #stream.parameter.named<"model"::"transformer_blocks.12.attn.norm_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.12.attn.to_q.bias = #stream.parameter.named<"model"::"transformer_blocks.12.attn.to_q.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.12.attn.to_k.bias = #stream.parameter.named<"model"::"transformer_blocks.12.attn.to_k.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.12.attn.to_v.bias = #stream.parameter.named<"model"::"transformer_blocks.12.attn.to_v.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.12.attn.add_k_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.12.attn.add_k_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.12.attn.add_v_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.12.attn.add_v_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.12.attn.add_q_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.12.attn.add_q_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.12.attn.to_out.0.bias = #stream.parameter.named<"model"::"transformer_blocks.12.attn.to_out.0.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.12.attn.to_add_out.bias = #stream.parameter.named<"model"::"transformer_blocks.12.attn.to_add_out.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.12.attn.norm_added_q.weight = #stream.parameter.named<"model"::"transformer_blocks.12.attn.norm_added_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.12.attn.norm_added_k.weight = #stream.parameter.named<"model"::"transformer_blocks.12.attn.norm_added_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.12.ff.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.12.ff.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.12.ff.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.12.ff.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.12.ff_context.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.12.ff_context.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.12.ff_context.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.12.ff_context.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.13.norm1.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.13.norm1.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.13.norm1.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.13.norm1.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.13.norm1_context.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.13.norm1_context.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.13.norm1_context.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.13.norm1_context.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.13.attn.norm_q.weight = #stream.parameter.named<"model"::"transformer_blocks.13.attn.norm_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.13.attn.norm_k.weight = #stream.parameter.named<"model"::"transformer_blocks.13.attn.norm_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.13.attn.to_q.bias = #stream.parameter.named<"model"::"transformer_blocks.13.attn.to_q.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.13.attn.to_k.bias = #stream.parameter.named<"model"::"transformer_blocks.13.attn.to_k.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.13.attn.to_v.bias = #stream.parameter.named<"model"::"transformer_blocks.13.attn.to_v.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.13.attn.add_k_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.13.attn.add_k_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.13.attn.add_v_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.13.attn.add_v_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.13.attn.add_q_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.13.attn.add_q_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.13.attn.to_out.0.bias = #stream.parameter.named<"model"::"transformer_blocks.13.attn.to_out.0.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.13.attn.to_add_out.bias = #stream.parameter.named<"model"::"transformer_blocks.13.attn.to_add_out.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.13.attn.norm_added_q.weight = #stream.parameter.named<"model"::"transformer_blocks.13.attn.norm_added_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.13.attn.norm_added_k.weight = #stream.parameter.named<"model"::"transformer_blocks.13.attn.norm_added_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.13.ff.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.13.ff.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.13.ff.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.13.ff.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.13.ff_context.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.13.ff_context.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.13.ff_context.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.13.ff_context.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.14.norm1.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.14.norm1.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.14.norm1.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.14.norm1.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.14.norm1_context.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.14.norm1_context.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.14.norm1_context.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.14.norm1_context.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.14.attn.norm_q.weight = #stream.parameter.named<"model"::"transformer_blocks.14.attn.norm_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.14.attn.norm_k.weight = #stream.parameter.named<"model"::"transformer_blocks.14.attn.norm_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.14.attn.to_q.bias = #stream.parameter.named<"model"::"transformer_blocks.14.attn.to_q.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.14.attn.to_k.bias = #stream.parameter.named<"model"::"transformer_blocks.14.attn.to_k.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.14.attn.to_v.bias = #stream.parameter.named<"model"::"transformer_blocks.14.attn.to_v.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.14.attn.add_k_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.14.attn.add_k_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.14.attn.add_v_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.14.attn.add_v_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.14.attn.add_q_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.14.attn.add_q_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.14.attn.to_out.0.bias = #stream.parameter.named<"model"::"transformer_blocks.14.attn.to_out.0.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.14.attn.to_add_out.bias = #stream.parameter.named<"model"::"transformer_blocks.14.attn.to_add_out.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.14.attn.norm_added_q.weight = #stream.parameter.named<"model"::"transformer_blocks.14.attn.norm_added_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.14.attn.norm_added_k.weight = #stream.parameter.named<"model"::"transformer_blocks.14.attn.norm_added_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.14.ff.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.14.ff.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.14.ff.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.14.ff.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.14.ff_context.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.14.ff_context.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.14.ff_context.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.14.ff_context.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.15.norm1.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.15.norm1.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.15.norm1.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.15.norm1.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.15.norm1_context.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.15.norm1_context.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.15.norm1_context.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.15.norm1_context.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.15.attn.norm_q.weight = #stream.parameter.named<"model"::"transformer_blocks.15.attn.norm_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.15.attn.norm_k.weight = #stream.parameter.named<"model"::"transformer_blocks.15.attn.norm_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.15.attn.to_q.bias = #stream.parameter.named<"model"::"transformer_blocks.15.attn.to_q.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.15.attn.to_k.bias = #stream.parameter.named<"model"::"transformer_blocks.15.attn.to_k.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.15.attn.to_v.bias = #stream.parameter.named<"model"::"transformer_blocks.15.attn.to_v.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.15.attn.add_k_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.15.attn.add_k_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.15.attn.add_v_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.15.attn.add_v_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.15.attn.add_q_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.15.attn.add_q_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.15.attn.to_out.0.bias = #stream.parameter.named<"model"::"transformer_blocks.15.attn.to_out.0.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.15.attn.to_add_out.bias = #stream.parameter.named<"model"::"transformer_blocks.15.attn.to_add_out.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.15.attn.norm_added_q.weight = #stream.parameter.named<"model"::"transformer_blocks.15.attn.norm_added_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.15.attn.norm_added_k.weight = #stream.parameter.named<"model"::"transformer_blocks.15.attn.norm_added_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.15.ff.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.15.ff.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.15.ff.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.15.ff.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.15.ff_context.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.15.ff_context.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.15.ff_context.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.15.ff_context.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.16.norm1.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.16.norm1.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.16.norm1.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.16.norm1.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.16.norm1_context.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.16.norm1_context.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.16.norm1_context.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.16.norm1_context.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.16.attn.norm_q.weight = #stream.parameter.named<"model"::"transformer_blocks.16.attn.norm_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.16.attn.norm_k.weight = #stream.parameter.named<"model"::"transformer_blocks.16.attn.norm_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.16.attn.to_q.bias = #stream.parameter.named<"model"::"transformer_blocks.16.attn.to_q.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.16.attn.to_k.bias = #stream.parameter.named<"model"::"transformer_blocks.16.attn.to_k.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.16.attn.to_v.bias = #stream.parameter.named<"model"::"transformer_blocks.16.attn.to_v.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.16.attn.add_k_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.16.attn.add_k_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.16.attn.add_v_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.16.attn.add_v_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.16.attn.add_q_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.16.attn.add_q_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.16.attn.to_out.0.bias = #stream.parameter.named<"model"::"transformer_blocks.16.attn.to_out.0.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.16.attn.to_add_out.bias = #stream.parameter.named<"model"::"transformer_blocks.16.attn.to_add_out.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.16.attn.norm_added_q.weight = #stream.parameter.named<"model"::"transformer_blocks.16.attn.norm_added_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.16.attn.norm_added_k.weight = #stream.parameter.named<"model"::"transformer_blocks.16.attn.norm_added_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.16.ff.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.16.ff.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.16.ff.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.16.ff.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.16.ff_context.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.16.ff_context.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.16.ff_context.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.16.ff_context.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.17.norm1.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.17.norm1.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.17.norm1.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.17.norm1.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.17.norm1_context.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.17.norm1_context.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.17.norm1_context.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.17.norm1_context.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.17.attn.norm_q.weight = #stream.parameter.named<"model"::"transformer_blocks.17.attn.norm_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.17.attn.norm_k.weight = #stream.parameter.named<"model"::"transformer_blocks.17.attn.norm_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.17.attn.to_q.bias = #stream.parameter.named<"model"::"transformer_blocks.17.attn.to_q.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.17.attn.to_k.bias = #stream.parameter.named<"model"::"transformer_blocks.17.attn.to_k.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.17.attn.to_v.bias = #stream.parameter.named<"model"::"transformer_blocks.17.attn.to_v.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.17.attn.add_k_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.17.attn.add_k_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.17.attn.add_v_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.17.attn.add_v_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.17.attn.add_q_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.17.attn.add_q_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.17.attn.to_out.0.bias = #stream.parameter.named<"model"::"transformer_blocks.17.attn.to_out.0.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.17.attn.to_add_out.bias = #stream.parameter.named<"model"::"transformer_blocks.17.attn.to_add_out.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.17.attn.norm_added_q.weight = #stream.parameter.named<"model"::"transformer_blocks.17.attn.norm_added_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.17.attn.norm_added_k.weight = #stream.parameter.named<"model"::"transformer_blocks.17.attn.norm_added_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.17.ff.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.17.ff.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.17.ff.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.17.ff.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.17.ff_context.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.17.ff_context.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.17.ff_context.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.17.ff_context.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.18.norm1.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.18.norm1.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.18.norm1.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.18.norm1.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.18.norm1_context.linear.weight = #stream.parameter.named<"model"::"transformer_blocks.18.norm1_context.linear.weight"> : tensor<18432x3072xbf16> util.global private @transformer_blocks.18.norm1_context.linear.bias = #stream.parameter.named<"model"::"transformer_blocks.18.norm1_context.linear.bias"> : tensor<18432xbf16> util.global private @transformer_blocks.18.attn.norm_q.weight = #stream.parameter.named<"model"::"transformer_blocks.18.attn.norm_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.18.attn.norm_k.weight = #stream.parameter.named<"model"::"transformer_blocks.18.attn.norm_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.18.attn.to_q.bias = #stream.parameter.named<"model"::"transformer_blocks.18.attn.to_q.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.18.attn.to_k.bias = #stream.parameter.named<"model"::"transformer_blocks.18.attn.to_k.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.18.attn.to_v.bias = #stream.parameter.named<"model"::"transformer_blocks.18.attn.to_v.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.18.attn.add_k_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.18.attn.add_k_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.18.attn.add_v_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.18.attn.add_v_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.18.attn.add_q_proj.bias = #stream.parameter.named<"model"::"transformer_blocks.18.attn.add_q_proj.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.18.attn.to_out.0.bias = #stream.parameter.named<"model"::"transformer_blocks.18.attn.to_out.0.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.18.attn.to_add_out.bias = #stream.parameter.named<"model"::"transformer_blocks.18.attn.to_add_out.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.18.attn.norm_added_q.weight = #stream.parameter.named<"model"::"transformer_blocks.18.attn.norm_added_q.weight"> : tensor<128xbf16> util.global private @transformer_blocks.18.attn.norm_added_k.weight = #stream.parameter.named<"model"::"transformer_blocks.18.attn.norm_added_k.weight"> : tensor<128xbf16> util.global private @transformer_blocks.18.ff.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.18.ff.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.18.ff.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.18.ff.net.2.bias"> : tensor<3072xbf16> util.global private @transformer_blocks.18.ff_context.net.0.proj.bias = #stream.parameter.named<"model"::"transformer_blocks.18.ff_context.net.0.proj.bias"> : tensor<12288xbf16> util.global private @transformer_blocks.18.ff_context.net.2.bias = #stream.parameter.named<"model"::"transformer_blocks.18.ff_context.net.2.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.0.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.0.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.0.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.0.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.0.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.0.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.0.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.0.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.0.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.0.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.0.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.0.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.0.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.0.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.0.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.0.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.0.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.0.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.1.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.1.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.1.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.1.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.1.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.1.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.1.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.1.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.1.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.1.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.1.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.1.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.1.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.1.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.1.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.1.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.1.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.1.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.2.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.2.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.2.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.2.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.2.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.2.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.2.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.2.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.2.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.2.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.2.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.2.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.2.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.2.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.2.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.2.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.2.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.2.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.3.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.3.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.3.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.3.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.3.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.3.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.3.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.3.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.3.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.3.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.3.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.3.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.3.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.3.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.3.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.3.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.3.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.3.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.4.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.4.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.4.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.4.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.4.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.4.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.4.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.4.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.4.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.4.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.4.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.4.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.4.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.4.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.4.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.4.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.4.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.4.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.5.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.5.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.5.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.5.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.5.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.5.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.5.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.5.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.5.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.5.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.5.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.5.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.5.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.5.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.5.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.5.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.5.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.5.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.6.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.6.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.6.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.6.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.6.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.6.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.6.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.6.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.6.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.6.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.6.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.6.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.6.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.6.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.6.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.6.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.6.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.6.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.7.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.7.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.7.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.7.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.7.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.7.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.7.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.7.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.7.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.7.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.7.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.7.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.7.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.7.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.7.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.7.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.7.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.7.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.8.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.8.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.8.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.8.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.8.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.8.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.8.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.8.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.8.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.8.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.8.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.8.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.8.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.8.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.8.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.8.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.8.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.8.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.9.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.9.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.9.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.9.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.9.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.9.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.9.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.9.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.9.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.9.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.9.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.9.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.9.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.9.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.9.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.9.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.9.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.9.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.10.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.10.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.10.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.10.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.10.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.10.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.10.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.10.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.10.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.10.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.10.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.10.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.10.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.10.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.10.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.10.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.10.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.10.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.11.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.11.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.11.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.11.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.11.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.11.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.11.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.11.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.11.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.11.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.11.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.11.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.11.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.11.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.11.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.11.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.11.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.11.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.12.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.12.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.12.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.12.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.12.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.12.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.12.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.12.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.12.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.12.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.12.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.12.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.12.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.12.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.12.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.12.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.12.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.12.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.13.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.13.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.13.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.13.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.13.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.13.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.13.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.13.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.13.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.13.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.13.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.13.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.13.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.13.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.13.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.13.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.13.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.13.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.14.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.14.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.14.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.14.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.14.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.14.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.14.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.14.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.14.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.14.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.14.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.14.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.14.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.14.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.14.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.14.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.14.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.14.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.15.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.15.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.15.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.15.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.15.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.15.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.15.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.15.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.15.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.15.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.15.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.15.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.15.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.15.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.15.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.15.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.15.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.15.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.16.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.16.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.16.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.16.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.16.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.16.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.16.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.16.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.16.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.16.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.16.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.16.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.16.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.16.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.16.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.16.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.16.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.16.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.17.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.17.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.17.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.17.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.17.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.17.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.17.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.17.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.17.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.17.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.17.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.17.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.17.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.17.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.17.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.17.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.17.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.17.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.18.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.18.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.18.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.18.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.18.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.18.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.18.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.18.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.18.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.18.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.18.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.18.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.18.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.18.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.18.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.18.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.18.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.18.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.19.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.19.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.19.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.19.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.19.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.19.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.19.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.19.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.19.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.19.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.19.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.19.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.19.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.19.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.19.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.19.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.19.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.19.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.20.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.20.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.20.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.20.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.20.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.20.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.20.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.20.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.20.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.20.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.20.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.20.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.20.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.20.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.20.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.20.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.20.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.20.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.21.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.21.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.21.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.21.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.21.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.21.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.21.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.21.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.21.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.21.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.21.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.21.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.21.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.21.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.21.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.21.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.21.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.21.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.22.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.22.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.22.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.22.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.22.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.22.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.22.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.22.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.22.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.22.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.22.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.22.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.22.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.22.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.22.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.22.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.22.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.22.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.23.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.23.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.23.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.23.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.23.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.23.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.23.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.23.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.23.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.23.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.23.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.23.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.23.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.23.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.23.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.23.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.23.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.23.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.24.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.24.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.24.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.24.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.24.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.24.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.24.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.24.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.24.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.24.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.24.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.24.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.24.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.24.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.24.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.24.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.24.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.24.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.25.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.25.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.25.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.25.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.25.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.25.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.25.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.25.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.25.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.25.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.25.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.25.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.25.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.25.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.25.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.25.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.25.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.25.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.26.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.26.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.26.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.26.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.26.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.26.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.26.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.26.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.26.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.26.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.26.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.26.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.26.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.26.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.26.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.26.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.26.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.26.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.27.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.27.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.27.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.27.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.27.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.27.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.27.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.27.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.27.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.27.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.27.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.27.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.27.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.27.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.27.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.27.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.27.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.27.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.28.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.28.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.28.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.28.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.28.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.28.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.28.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.28.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.28.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.28.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.28.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.28.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.28.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.28.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.28.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.28.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.28.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.28.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.29.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.29.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.29.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.29.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.29.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.29.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.29.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.29.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.29.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.29.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.29.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.29.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.29.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.29.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.29.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.29.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.29.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.29.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.30.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.30.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.30.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.30.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.30.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.30.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.30.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.30.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.30.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.30.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.30.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.30.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.30.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.30.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.30.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.30.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.30.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.30.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.31.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.31.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.31.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.31.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.31.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.31.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.31.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.31.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.31.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.31.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.31.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.31.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.31.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.31.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.31.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.31.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.31.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.31.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.32.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.32.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.32.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.32.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.32.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.32.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.32.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.32.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.32.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.32.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.32.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.32.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.32.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.32.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.32.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.32.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.32.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.32.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.33.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.33.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.33.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.33.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.33.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.33.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.33.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.33.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.33.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.33.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.33.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.33.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.33.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.33.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.33.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.33.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.33.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.33.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.34.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.34.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.34.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.34.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.34.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.34.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.34.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.34.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.34.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.34.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.34.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.34.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.34.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.34.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.34.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.34.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.34.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.34.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.35.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.35.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.35.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.35.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.35.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.35.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.35.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.35.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.35.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.35.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.35.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.35.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.35.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.35.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.35.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.35.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.35.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.35.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.36.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.36.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.36.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.36.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.36.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.36.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.36.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.36.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.36.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.36.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.36.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.36.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.36.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.36.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.36.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.36.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.36.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.36.attn.to_v.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.37.norm.linear.weight = #stream.parameter.named<"model"::"single_transformer_blocks.37.norm.linear.weight"> : tensor<9216x3072xbf16> util.global private @single_transformer_blocks.37.norm.linear.bias = #stream.parameter.named<"model"::"single_transformer_blocks.37.norm.linear.bias"> : tensor<9216xbf16> util.global private @single_transformer_blocks.37.proj_mlp.bias = #stream.parameter.named<"model"::"single_transformer_blocks.37.proj_mlp.bias"> : tensor<12288xbf16> util.global private @single_transformer_blocks.37.proj_out.bias = #stream.parameter.named<"model"::"single_transformer_blocks.37.proj_out.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.37.attn.norm_q.weight = #stream.parameter.named<"model"::"single_transformer_blocks.37.attn.norm_q.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.37.attn.norm_k.weight = #stream.parameter.named<"model"::"single_transformer_blocks.37.attn.norm_k.weight"> : tensor<128xbf16> util.global private @single_transformer_blocks.37.attn.to_q.bias = #stream.parameter.named<"model"::"single_transformer_blocks.37.attn.to_q.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.37.attn.to_k.bias = #stream.parameter.named<"model"::"single_transformer_blocks.37.attn.to_k.bias"> : tensor<3072xbf16> util.global private @single_transformer_blocks.37.attn.to_v.bias = #stream.parameter.named<"model"::"single_transformer_blocks.37.attn.to_v.bias"> : tensor<3072xbf16> util.global private @norm_out.linear.weight = #stream.parameter.named<"model"::"norm_out.linear.weight"> : tensor<6144x3072xbf16> util.global private @norm_out.linear.bias = #stream.parameter.named<"model"::"norm_out.linear.bias"> : tensor<6144xbf16> util.global private @onnx__MatMul_19741 = #stream.parameter.named<"model"::"onnx__MatMul_19741"> : tensor<64x3072xbf16> util.global private @onnx__MatMul_19758 = #stream.parameter.named<"model"::"onnx__MatMul_19758"> : tensor<4096x3072xbf16> util.global private @onnx__MatMul_19762 = #stream.parameter.named<"model"::"onnx__MatMul_19762"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19763 = #stream.parameter.named<"model"::"onnx__MatMul_19763"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19764 = #stream.parameter.named<"model"::"onnx__MatMul_19764"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19795 = #stream.parameter.named<"model"::"onnx__MatMul_19795"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19796 = #stream.parameter.named<"model"::"onnx__MatMul_19796"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19797 = #stream.parameter.named<"model"::"onnx__MatMul_19797"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19822 = #stream.parameter.named<"model"::"onnx__MatMul_19822"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19823 = #stream.parameter.named<"model"::"onnx__MatMul_19823"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19824 = #stream.parameter.named<"model"::"onnx__MatMul_19824"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_19825 = #stream.parameter.named<"model"::"onnx__MatMul_19825"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_19826 = #stream.parameter.named<"model"::"onnx__MatMul_19826"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_19827 = #stream.parameter.named<"model"::"onnx__MatMul_19827"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_19828 = #stream.parameter.named<"model"::"onnx__MatMul_19828"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19829 = #stream.parameter.named<"model"::"onnx__MatMul_19829"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19830 = #stream.parameter.named<"model"::"onnx__MatMul_19830"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19843 = #stream.parameter.named<"model"::"onnx__MatMul_19843"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19844 = #stream.parameter.named<"model"::"onnx__MatMul_19844"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19845 = #stream.parameter.named<"model"::"onnx__MatMul_19845"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19857 = #stream.parameter.named<"model"::"onnx__MatMul_19857"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19858 = #stream.parameter.named<"model"::"onnx__MatMul_19858"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19859 = #stream.parameter.named<"model"::"onnx__MatMul_19859"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_19860 = #stream.parameter.named<"model"::"onnx__MatMul_19860"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_19861 = #stream.parameter.named<"model"::"onnx__MatMul_19861"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_19862 = #stream.parameter.named<"model"::"onnx__MatMul_19862"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_19863 = #stream.parameter.named<"model"::"onnx__MatMul_19863"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19864 = #stream.parameter.named<"model"::"onnx__MatMul_19864"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19865 = #stream.parameter.named<"model"::"onnx__MatMul_19865"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19878 = #stream.parameter.named<"model"::"onnx__MatMul_19878"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19879 = #stream.parameter.named<"model"::"onnx__MatMul_19879"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19880 = #stream.parameter.named<"model"::"onnx__MatMul_19880"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19892 = #stream.parameter.named<"model"::"onnx__MatMul_19892"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19893 = #stream.parameter.named<"model"::"onnx__MatMul_19893"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19894 = #stream.parameter.named<"model"::"onnx__MatMul_19894"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_19895 = #stream.parameter.named<"model"::"onnx__MatMul_19895"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_19896 = #stream.parameter.named<"model"::"onnx__MatMul_19896"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_19897 = #stream.parameter.named<"model"::"onnx__MatMul_19897"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_19898 = #stream.parameter.named<"model"::"onnx__MatMul_19898"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19899 = #stream.parameter.named<"model"::"onnx__MatMul_19899"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19900 = #stream.parameter.named<"model"::"onnx__MatMul_19900"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19913 = #stream.parameter.named<"model"::"onnx__MatMul_19913"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19914 = #stream.parameter.named<"model"::"onnx__MatMul_19914"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19915 = #stream.parameter.named<"model"::"onnx__MatMul_19915"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19927 = #stream.parameter.named<"model"::"onnx__MatMul_19927"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19928 = #stream.parameter.named<"model"::"onnx__MatMul_19928"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19929 = #stream.parameter.named<"model"::"onnx__MatMul_19929"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_19930 = #stream.parameter.named<"model"::"onnx__MatMul_19930"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_19931 = #stream.parameter.named<"model"::"onnx__MatMul_19931"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_19932 = #stream.parameter.named<"model"::"onnx__MatMul_19932"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_19933 = #stream.parameter.named<"model"::"onnx__MatMul_19933"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19934 = #stream.parameter.named<"model"::"onnx__MatMul_19934"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19935 = #stream.parameter.named<"model"::"onnx__MatMul_19935"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19948 = #stream.parameter.named<"model"::"onnx__MatMul_19948"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19949 = #stream.parameter.named<"model"::"onnx__MatMul_19949"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19950 = #stream.parameter.named<"model"::"onnx__MatMul_19950"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19962 = #stream.parameter.named<"model"::"onnx__MatMul_19962"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19963 = #stream.parameter.named<"model"::"onnx__MatMul_19963"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19964 = #stream.parameter.named<"model"::"onnx__MatMul_19964"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_19965 = #stream.parameter.named<"model"::"onnx__MatMul_19965"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_19966 = #stream.parameter.named<"model"::"onnx__MatMul_19966"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_19967 = #stream.parameter.named<"model"::"onnx__MatMul_19967"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_19968 = #stream.parameter.named<"model"::"onnx__MatMul_19968"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19969 = #stream.parameter.named<"model"::"onnx__MatMul_19969"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19970 = #stream.parameter.named<"model"::"onnx__MatMul_19970"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19983 = #stream.parameter.named<"model"::"onnx__MatMul_19983"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19984 = #stream.parameter.named<"model"::"onnx__MatMul_19984"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19985 = #stream.parameter.named<"model"::"onnx__MatMul_19985"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19997 = #stream.parameter.named<"model"::"onnx__MatMul_19997"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19998 = #stream.parameter.named<"model"::"onnx__MatMul_19998"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_19999 = #stream.parameter.named<"model"::"onnx__MatMul_19999"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20000 = #stream.parameter.named<"model"::"onnx__MatMul_20000"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_20001 = #stream.parameter.named<"model"::"onnx__MatMul_20001"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20002 = #stream.parameter.named<"model"::"onnx__MatMul_20002"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_20003 = #stream.parameter.named<"model"::"onnx__MatMul_20003"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20004 = #stream.parameter.named<"model"::"onnx__MatMul_20004"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20005 = #stream.parameter.named<"model"::"onnx__MatMul_20005"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20018 = #stream.parameter.named<"model"::"onnx__MatMul_20018"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20019 = #stream.parameter.named<"model"::"onnx__MatMul_20019"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20020 = #stream.parameter.named<"model"::"onnx__MatMul_20020"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20032 = #stream.parameter.named<"model"::"onnx__MatMul_20032"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20033 = #stream.parameter.named<"model"::"onnx__MatMul_20033"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20034 = #stream.parameter.named<"model"::"onnx__MatMul_20034"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20035 = #stream.parameter.named<"model"::"onnx__MatMul_20035"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_20036 = #stream.parameter.named<"model"::"onnx__MatMul_20036"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20037 = #stream.parameter.named<"model"::"onnx__MatMul_20037"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_20038 = #stream.parameter.named<"model"::"onnx__MatMul_20038"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20039 = #stream.parameter.named<"model"::"onnx__MatMul_20039"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20040 = #stream.parameter.named<"model"::"onnx__MatMul_20040"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20053 = #stream.parameter.named<"model"::"onnx__MatMul_20053"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20054 = #stream.parameter.named<"model"::"onnx__MatMul_20054"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20055 = #stream.parameter.named<"model"::"onnx__MatMul_20055"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20067 = #stream.parameter.named<"model"::"onnx__MatMul_20067"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20068 = #stream.parameter.named<"model"::"onnx__MatMul_20068"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20069 = #stream.parameter.named<"model"::"onnx__MatMul_20069"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20070 = #stream.parameter.named<"model"::"onnx__MatMul_20070"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_20071 = #stream.parameter.named<"model"::"onnx__MatMul_20071"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20072 = #stream.parameter.named<"model"::"onnx__MatMul_20072"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_20073 = #stream.parameter.named<"model"::"onnx__MatMul_20073"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20074 = #stream.parameter.named<"model"::"onnx__MatMul_20074"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20075 = #stream.parameter.named<"model"::"onnx__MatMul_20075"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20088 = #stream.parameter.named<"model"::"onnx__MatMul_20088"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20089 = #stream.parameter.named<"model"::"onnx__MatMul_20089"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20090 = #stream.parameter.named<"model"::"onnx__MatMul_20090"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20102 = #stream.parameter.named<"model"::"onnx__MatMul_20102"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20103 = #stream.parameter.named<"model"::"onnx__MatMul_20103"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20104 = #stream.parameter.named<"model"::"onnx__MatMul_20104"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20105 = #stream.parameter.named<"model"::"onnx__MatMul_20105"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_20106 = #stream.parameter.named<"model"::"onnx__MatMul_20106"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20107 = #stream.parameter.named<"model"::"onnx__MatMul_20107"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_20108 = #stream.parameter.named<"model"::"onnx__MatMul_20108"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20109 = #stream.parameter.named<"model"::"onnx__MatMul_20109"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20110 = #stream.parameter.named<"model"::"onnx__MatMul_20110"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20123 = #stream.parameter.named<"model"::"onnx__MatMul_20123"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20124 = #stream.parameter.named<"model"::"onnx__MatMul_20124"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20125 = #stream.parameter.named<"model"::"onnx__MatMul_20125"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20137 = #stream.parameter.named<"model"::"onnx__MatMul_20137"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20138 = #stream.parameter.named<"model"::"onnx__MatMul_20138"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20139 = #stream.parameter.named<"model"::"onnx__MatMul_20139"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20140 = #stream.parameter.named<"model"::"onnx__MatMul_20140"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_20141 = #stream.parameter.named<"model"::"onnx__MatMul_20141"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20142 = #stream.parameter.named<"model"::"onnx__MatMul_20142"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_20143 = #stream.parameter.named<"model"::"onnx__MatMul_20143"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20144 = #stream.parameter.named<"model"::"onnx__MatMul_20144"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20145 = #stream.parameter.named<"model"::"onnx__MatMul_20145"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20158 = #stream.parameter.named<"model"::"onnx__MatMul_20158"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20159 = #stream.parameter.named<"model"::"onnx__MatMul_20159"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20160 = #stream.parameter.named<"model"::"onnx__MatMul_20160"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20172 = #stream.parameter.named<"model"::"onnx__MatMul_20172"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20173 = #stream.parameter.named<"model"::"onnx__MatMul_20173"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20174 = #stream.parameter.named<"model"::"onnx__MatMul_20174"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20175 = #stream.parameter.named<"model"::"onnx__MatMul_20175"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_20176 = #stream.parameter.named<"model"::"onnx__MatMul_20176"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20177 = #stream.parameter.named<"model"::"onnx__MatMul_20177"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_20178 = #stream.parameter.named<"model"::"onnx__MatMul_20178"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20179 = #stream.parameter.named<"model"::"onnx__MatMul_20179"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20180 = #stream.parameter.named<"model"::"onnx__MatMul_20180"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20193 = #stream.parameter.named<"model"::"onnx__MatMul_20193"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20194 = #stream.parameter.named<"model"::"onnx__MatMul_20194"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20195 = #stream.parameter.named<"model"::"onnx__MatMul_20195"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20207 = #stream.parameter.named<"model"::"onnx__MatMul_20207"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20208 = #stream.parameter.named<"model"::"onnx__MatMul_20208"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20209 = #stream.parameter.named<"model"::"onnx__MatMul_20209"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20210 = #stream.parameter.named<"model"::"onnx__MatMul_20210"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_20211 = #stream.parameter.named<"model"::"onnx__MatMul_20211"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20212 = #stream.parameter.named<"model"::"onnx__MatMul_20212"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_20213 = #stream.parameter.named<"model"::"onnx__MatMul_20213"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20214 = #stream.parameter.named<"model"::"onnx__MatMul_20214"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20215 = #stream.parameter.named<"model"::"onnx__MatMul_20215"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20228 = #stream.parameter.named<"model"::"onnx__MatMul_20228"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20229 = #stream.parameter.named<"model"::"onnx__MatMul_20229"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20230 = #stream.parameter.named<"model"::"onnx__MatMul_20230"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20242 = #stream.parameter.named<"model"::"onnx__MatMul_20242"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20243 = #stream.parameter.named<"model"::"onnx__MatMul_20243"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20244 = #stream.parameter.named<"model"::"onnx__MatMul_20244"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20245 = #stream.parameter.named<"model"::"onnx__MatMul_20245"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_20246 = #stream.parameter.named<"model"::"onnx__MatMul_20246"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20247 = #stream.parameter.named<"model"::"onnx__MatMul_20247"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_20248 = #stream.parameter.named<"model"::"onnx__MatMul_20248"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20249 = #stream.parameter.named<"model"::"onnx__MatMul_20249"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20250 = #stream.parameter.named<"model"::"onnx__MatMul_20250"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20263 = #stream.parameter.named<"model"::"onnx__MatMul_20263"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20264 = #stream.parameter.named<"model"::"onnx__MatMul_20264"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20265 = #stream.parameter.named<"model"::"onnx__MatMul_20265"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20277 = #stream.parameter.named<"model"::"onnx__MatMul_20277"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20278 = #stream.parameter.named<"model"::"onnx__MatMul_20278"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20279 = #stream.parameter.named<"model"::"onnx__MatMul_20279"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20280 = #stream.parameter.named<"model"::"onnx__MatMul_20280"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_20281 = #stream.parameter.named<"model"::"onnx__MatMul_20281"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20282 = #stream.parameter.named<"model"::"onnx__MatMul_20282"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_20283 = #stream.parameter.named<"model"::"onnx__MatMul_20283"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20284 = #stream.parameter.named<"model"::"onnx__MatMul_20284"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20285 = #stream.parameter.named<"model"::"onnx__MatMul_20285"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20298 = #stream.parameter.named<"model"::"onnx__MatMul_20298"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20299 = #stream.parameter.named<"model"::"onnx__MatMul_20299"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20300 = #stream.parameter.named<"model"::"onnx__MatMul_20300"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20312 = #stream.parameter.named<"model"::"onnx__MatMul_20312"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20313 = #stream.parameter.named<"model"::"onnx__MatMul_20313"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20314 = #stream.parameter.named<"model"::"onnx__MatMul_20314"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20315 = #stream.parameter.named<"model"::"onnx__MatMul_20315"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_20316 = #stream.parameter.named<"model"::"onnx__MatMul_20316"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20317 = #stream.parameter.named<"model"::"onnx__MatMul_20317"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_20318 = #stream.parameter.named<"model"::"onnx__MatMul_20318"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20319 = #stream.parameter.named<"model"::"onnx__MatMul_20319"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20320 = #stream.parameter.named<"model"::"onnx__MatMul_20320"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20333 = #stream.parameter.named<"model"::"onnx__MatMul_20333"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20334 = #stream.parameter.named<"model"::"onnx__MatMul_20334"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20335 = #stream.parameter.named<"model"::"onnx__MatMul_20335"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20347 = #stream.parameter.named<"model"::"onnx__MatMul_20347"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20348 = #stream.parameter.named<"model"::"onnx__MatMul_20348"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20349 = #stream.parameter.named<"model"::"onnx__MatMul_20349"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20350 = #stream.parameter.named<"model"::"onnx__MatMul_20350"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_20351 = #stream.parameter.named<"model"::"onnx__MatMul_20351"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20352 = #stream.parameter.named<"model"::"onnx__MatMul_20352"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_20353 = #stream.parameter.named<"model"::"onnx__MatMul_20353"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20354 = #stream.parameter.named<"model"::"onnx__MatMul_20354"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20355 = #stream.parameter.named<"model"::"onnx__MatMul_20355"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20368 = #stream.parameter.named<"model"::"onnx__MatMul_20368"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20369 = #stream.parameter.named<"model"::"onnx__MatMul_20369"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20370 = #stream.parameter.named<"model"::"onnx__MatMul_20370"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20382 = #stream.parameter.named<"model"::"onnx__MatMul_20382"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20383 = #stream.parameter.named<"model"::"onnx__MatMul_20383"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20384 = #stream.parameter.named<"model"::"onnx__MatMul_20384"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20385 = #stream.parameter.named<"model"::"onnx__MatMul_20385"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_20386 = #stream.parameter.named<"model"::"onnx__MatMul_20386"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20387 = #stream.parameter.named<"model"::"onnx__MatMul_20387"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_20388 = #stream.parameter.named<"model"::"onnx__MatMul_20388"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20389 = #stream.parameter.named<"model"::"onnx__MatMul_20389"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20390 = #stream.parameter.named<"model"::"onnx__MatMul_20390"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20403 = #stream.parameter.named<"model"::"onnx__MatMul_20403"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20404 = #stream.parameter.named<"model"::"onnx__MatMul_20404"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20405 = #stream.parameter.named<"model"::"onnx__MatMul_20405"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20417 = #stream.parameter.named<"model"::"onnx__MatMul_20417"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20418 = #stream.parameter.named<"model"::"onnx__MatMul_20418"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20419 = #stream.parameter.named<"model"::"onnx__MatMul_20419"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20420 = #stream.parameter.named<"model"::"onnx__MatMul_20420"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_20421 = #stream.parameter.named<"model"::"onnx__MatMul_20421"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20422 = #stream.parameter.named<"model"::"onnx__MatMul_20422"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_20423 = #stream.parameter.named<"model"::"onnx__MatMul_20423"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20424 = #stream.parameter.named<"model"::"onnx__MatMul_20424"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20425 = #stream.parameter.named<"model"::"onnx__MatMul_20425"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20438 = #stream.parameter.named<"model"::"onnx__MatMul_20438"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20439 = #stream.parameter.named<"model"::"onnx__MatMul_20439"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20440 = #stream.parameter.named<"model"::"onnx__MatMul_20440"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20452 = #stream.parameter.named<"model"::"onnx__MatMul_20452"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20453 = #stream.parameter.named<"model"::"onnx__MatMul_20453"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20454 = #stream.parameter.named<"model"::"onnx__MatMul_20454"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20455 = #stream.parameter.named<"model"::"onnx__MatMul_20455"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_20456 = #stream.parameter.named<"model"::"onnx__MatMul_20456"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20457 = #stream.parameter.named<"model"::"onnx__MatMul_20457"> : tensor<12288x3072xbf16> util.global private @onnx__MatMul_20458 = #stream.parameter.named<"model"::"onnx__MatMul_20458"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20459 = #stream.parameter.named<"model"::"onnx__MatMul_20459"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20460 = #stream.parameter.named<"model"::"onnx__MatMul_20460"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20461 = #stream.parameter.named<"model"::"onnx__MatMul_20461"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20473 = #stream.parameter.named<"model"::"onnx__MatMul_20473"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_20474 = #stream.parameter.named<"model"::"onnx__MatMul_20474"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20475 = #stream.parameter.named<"model"::"onnx__MatMul_20475"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20476 = #stream.parameter.named<"model"::"onnx__MatMul_20476"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20477 = #stream.parameter.named<"model"::"onnx__MatMul_20477"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20489 = #stream.parameter.named<"model"::"onnx__MatMul_20489"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_20490 = #stream.parameter.named<"model"::"onnx__MatMul_20490"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20491 = #stream.parameter.named<"model"::"onnx__MatMul_20491"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20492 = #stream.parameter.named<"model"::"onnx__MatMul_20492"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20493 = #stream.parameter.named<"model"::"onnx__MatMul_20493"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20505 = #stream.parameter.named<"model"::"onnx__MatMul_20505"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_20506 = #stream.parameter.named<"model"::"onnx__MatMul_20506"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20507 = #stream.parameter.named<"model"::"onnx__MatMul_20507"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20508 = #stream.parameter.named<"model"::"onnx__MatMul_20508"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20509 = #stream.parameter.named<"model"::"onnx__MatMul_20509"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20521 = #stream.parameter.named<"model"::"onnx__MatMul_20521"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_20522 = #stream.parameter.named<"model"::"onnx__MatMul_20522"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20523 = #stream.parameter.named<"model"::"onnx__MatMul_20523"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20524 = #stream.parameter.named<"model"::"onnx__MatMul_20524"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20525 = #stream.parameter.named<"model"::"onnx__MatMul_20525"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20537 = #stream.parameter.named<"model"::"onnx__MatMul_20537"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_20538 = #stream.parameter.named<"model"::"onnx__MatMul_20538"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20539 = #stream.parameter.named<"model"::"onnx__MatMul_20539"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20540 = #stream.parameter.named<"model"::"onnx__MatMul_20540"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20541 = #stream.parameter.named<"model"::"onnx__MatMul_20541"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20553 = #stream.parameter.named<"model"::"onnx__MatMul_20553"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_20554 = #stream.parameter.named<"model"::"onnx__MatMul_20554"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20555 = #stream.parameter.named<"model"::"onnx__MatMul_20555"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20556 = #stream.parameter.named<"model"::"onnx__MatMul_20556"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20557 = #stream.parameter.named<"model"::"onnx__MatMul_20557"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20569 = #stream.parameter.named<"model"::"onnx__MatMul_20569"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_20570 = #stream.parameter.named<"model"::"onnx__MatMul_20570"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20571 = #stream.parameter.named<"model"::"onnx__MatMul_20571"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20572 = #stream.parameter.named<"model"::"onnx__MatMul_20572"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20573 = #stream.parameter.named<"model"::"onnx__MatMul_20573"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20585 = #stream.parameter.named<"model"::"onnx__MatMul_20585"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_20586 = #stream.parameter.named<"model"::"onnx__MatMul_20586"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20587 = #stream.parameter.named<"model"::"onnx__MatMul_20587"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20588 = #stream.parameter.named<"model"::"onnx__MatMul_20588"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20589 = #stream.parameter.named<"model"::"onnx__MatMul_20589"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20601 = #stream.parameter.named<"model"::"onnx__MatMul_20601"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_20602 = #stream.parameter.named<"model"::"onnx__MatMul_20602"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20603 = #stream.parameter.named<"model"::"onnx__MatMul_20603"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20604 = #stream.parameter.named<"model"::"onnx__MatMul_20604"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20605 = #stream.parameter.named<"model"::"onnx__MatMul_20605"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20617 = #stream.parameter.named<"model"::"onnx__MatMul_20617"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_20618 = #stream.parameter.named<"model"::"onnx__MatMul_20618"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20619 = #stream.parameter.named<"model"::"onnx__MatMul_20619"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20620 = #stream.parameter.named<"model"::"onnx__MatMul_20620"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20621 = #stream.parameter.named<"model"::"onnx__MatMul_20621"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20633 = #stream.parameter.named<"model"::"onnx__MatMul_20633"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_20634 = #stream.parameter.named<"model"::"onnx__MatMul_20634"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20635 = #stream.parameter.named<"model"::"onnx__MatMul_20635"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20636 = #stream.parameter.named<"model"::"onnx__MatMul_20636"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20637 = #stream.parameter.named<"model"::"onnx__MatMul_20637"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20649 = #stream.parameter.named<"model"::"onnx__MatMul_20649"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_20650 = #stream.parameter.named<"model"::"onnx__MatMul_20650"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20651 = #stream.parameter.named<"model"::"onnx__MatMul_20651"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20652 = #stream.parameter.named<"model"::"onnx__MatMul_20652"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20653 = #stream.parameter.named<"model"::"onnx__MatMul_20653"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20665 = #stream.parameter.named<"model"::"onnx__MatMul_20665"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_20666 = #stream.parameter.named<"model"::"onnx__MatMul_20666"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20667 = #stream.parameter.named<"model"::"onnx__MatMul_20667"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20668 = #stream.parameter.named<"model"::"onnx__MatMul_20668"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20669 = #stream.parameter.named<"model"::"onnx__MatMul_20669"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20681 = #stream.parameter.named<"model"::"onnx__MatMul_20681"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_20682 = #stream.parameter.named<"model"::"onnx__MatMul_20682"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20683 = #stream.parameter.named<"model"::"onnx__MatMul_20683"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20684 = #stream.parameter.named<"model"::"onnx__MatMul_20684"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20685 = #stream.parameter.named<"model"::"onnx__MatMul_20685"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20697 = #stream.parameter.named<"model"::"onnx__MatMul_20697"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_20698 = #stream.parameter.named<"model"::"onnx__MatMul_20698"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20699 = #stream.parameter.named<"model"::"onnx__MatMul_20699"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20700 = #stream.parameter.named<"model"::"onnx__MatMul_20700"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20701 = #stream.parameter.named<"model"::"onnx__MatMul_20701"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20713 = #stream.parameter.named<"model"::"onnx__MatMul_20713"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_20714 = #stream.parameter.named<"model"::"onnx__MatMul_20714"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20715 = #stream.parameter.named<"model"::"onnx__MatMul_20715"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20716 = #stream.parameter.named<"model"::"onnx__MatMul_20716"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20717 = #stream.parameter.named<"model"::"onnx__MatMul_20717"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20729 = #stream.parameter.named<"model"::"onnx__MatMul_20729"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_20730 = #stream.parameter.named<"model"::"onnx__MatMul_20730"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20731 = #stream.parameter.named<"model"::"onnx__MatMul_20731"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20732 = #stream.parameter.named<"model"::"onnx__MatMul_20732"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20733 = #stream.parameter.named<"model"::"onnx__MatMul_20733"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20745 = #stream.parameter.named<"model"::"onnx__MatMul_20745"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_20746 = #stream.parameter.named<"model"::"onnx__MatMul_20746"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20747 = #stream.parameter.named<"model"::"onnx__MatMul_20747"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20748 = #stream.parameter.named<"model"::"onnx__MatMul_20748"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20749 = #stream.parameter.named<"model"::"onnx__MatMul_20749"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20761 = #stream.parameter.named<"model"::"onnx__MatMul_20761"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_20762 = #stream.parameter.named<"model"::"onnx__MatMul_20762"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20763 = #stream.parameter.named<"model"::"onnx__MatMul_20763"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20764 = #stream.parameter.named<"model"::"onnx__MatMul_20764"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20765 = #stream.parameter.named<"model"::"onnx__MatMul_20765"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20777 = #stream.parameter.named<"model"::"onnx__MatMul_20777"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_20778 = #stream.parameter.named<"model"::"onnx__MatMul_20778"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20779 = #stream.parameter.named<"model"::"onnx__MatMul_20779"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20780 = #stream.parameter.named<"model"::"onnx__MatMul_20780"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20781 = #stream.parameter.named<"model"::"onnx__MatMul_20781"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20793 = #stream.parameter.named<"model"::"onnx__MatMul_20793"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_20794 = #stream.parameter.named<"model"::"onnx__MatMul_20794"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20795 = #stream.parameter.named<"model"::"onnx__MatMul_20795"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20796 = #stream.parameter.named<"model"::"onnx__MatMul_20796"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20797 = #stream.parameter.named<"model"::"onnx__MatMul_20797"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20809 = #stream.parameter.named<"model"::"onnx__MatMul_20809"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_20810 = #stream.parameter.named<"model"::"onnx__MatMul_20810"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20811 = #stream.parameter.named<"model"::"onnx__MatMul_20811"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20812 = #stream.parameter.named<"model"::"onnx__MatMul_20812"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20813 = #stream.parameter.named<"model"::"onnx__MatMul_20813"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20825 = #stream.parameter.named<"model"::"onnx__MatMul_20825"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_20826 = #stream.parameter.named<"model"::"onnx__MatMul_20826"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20827 = #stream.parameter.named<"model"::"onnx__MatMul_20827"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20828 = #stream.parameter.named<"model"::"onnx__MatMul_20828"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20829 = #stream.parameter.named<"model"::"onnx__MatMul_20829"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20841 = #stream.parameter.named<"model"::"onnx__MatMul_20841"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_20842 = #stream.parameter.named<"model"::"onnx__MatMul_20842"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20843 = #stream.parameter.named<"model"::"onnx__MatMul_20843"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20844 = #stream.parameter.named<"model"::"onnx__MatMul_20844"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20845 = #stream.parameter.named<"model"::"onnx__MatMul_20845"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20857 = #stream.parameter.named<"model"::"onnx__MatMul_20857"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_20858 = #stream.parameter.named<"model"::"onnx__MatMul_20858"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20859 = #stream.parameter.named<"model"::"onnx__MatMul_20859"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20860 = #stream.parameter.named<"model"::"onnx__MatMul_20860"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20861 = #stream.parameter.named<"model"::"onnx__MatMul_20861"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20873 = #stream.parameter.named<"model"::"onnx__MatMul_20873"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_20874 = #stream.parameter.named<"model"::"onnx__MatMul_20874"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20875 = #stream.parameter.named<"model"::"onnx__MatMul_20875"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20876 = #stream.parameter.named<"model"::"onnx__MatMul_20876"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20877 = #stream.parameter.named<"model"::"onnx__MatMul_20877"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20889 = #stream.parameter.named<"model"::"onnx__MatMul_20889"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_20890 = #stream.parameter.named<"model"::"onnx__MatMul_20890"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20891 = #stream.parameter.named<"model"::"onnx__MatMul_20891"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20892 = #stream.parameter.named<"model"::"onnx__MatMul_20892"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20893 = #stream.parameter.named<"model"::"onnx__MatMul_20893"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20905 = #stream.parameter.named<"model"::"onnx__MatMul_20905"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_20906 = #stream.parameter.named<"model"::"onnx__MatMul_20906"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20907 = #stream.parameter.named<"model"::"onnx__MatMul_20907"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20908 = #stream.parameter.named<"model"::"onnx__MatMul_20908"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20909 = #stream.parameter.named<"model"::"onnx__MatMul_20909"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20921 = #stream.parameter.named<"model"::"onnx__MatMul_20921"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_20922 = #stream.parameter.named<"model"::"onnx__MatMul_20922"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20923 = #stream.parameter.named<"model"::"onnx__MatMul_20923"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20924 = #stream.parameter.named<"model"::"onnx__MatMul_20924"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20925 = #stream.parameter.named<"model"::"onnx__MatMul_20925"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20937 = #stream.parameter.named<"model"::"onnx__MatMul_20937"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_20938 = #stream.parameter.named<"model"::"onnx__MatMul_20938"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20939 = #stream.parameter.named<"model"::"onnx__MatMul_20939"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20940 = #stream.parameter.named<"model"::"onnx__MatMul_20940"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20941 = #stream.parameter.named<"model"::"onnx__MatMul_20941"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20953 = #stream.parameter.named<"model"::"onnx__MatMul_20953"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_20954 = #stream.parameter.named<"model"::"onnx__MatMul_20954"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20955 = #stream.parameter.named<"model"::"onnx__MatMul_20955"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20956 = #stream.parameter.named<"model"::"onnx__MatMul_20956"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20957 = #stream.parameter.named<"model"::"onnx__MatMul_20957"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20969 = #stream.parameter.named<"model"::"onnx__MatMul_20969"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_20970 = #stream.parameter.named<"model"::"onnx__MatMul_20970"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20971 = #stream.parameter.named<"model"::"onnx__MatMul_20971"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20972 = #stream.parameter.named<"model"::"onnx__MatMul_20972"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20973 = #stream.parameter.named<"model"::"onnx__MatMul_20973"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20985 = #stream.parameter.named<"model"::"onnx__MatMul_20985"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_20986 = #stream.parameter.named<"model"::"onnx__MatMul_20986"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_20987 = #stream.parameter.named<"model"::"onnx__MatMul_20987"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20988 = #stream.parameter.named<"model"::"onnx__MatMul_20988"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_20989 = #stream.parameter.named<"model"::"onnx__MatMul_20989"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_21001 = #stream.parameter.named<"model"::"onnx__MatMul_21001"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_21002 = #stream.parameter.named<"model"::"onnx__MatMul_21002"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_21003 = #stream.parameter.named<"model"::"onnx__MatMul_21003"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_21004 = #stream.parameter.named<"model"::"onnx__MatMul_21004"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_21005 = #stream.parameter.named<"model"::"onnx__MatMul_21005"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_21017 = #stream.parameter.named<"model"::"onnx__MatMul_21017"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_21018 = #stream.parameter.named<"model"::"onnx__MatMul_21018"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_21019 = #stream.parameter.named<"model"::"onnx__MatMul_21019"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_21020 = #stream.parameter.named<"model"::"onnx__MatMul_21020"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_21021 = #stream.parameter.named<"model"::"onnx__MatMul_21021"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_21033 = #stream.parameter.named<"model"::"onnx__MatMul_21033"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_21034 = #stream.parameter.named<"model"::"onnx__MatMul_21034"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_21035 = #stream.parameter.named<"model"::"onnx__MatMul_21035"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_21036 = #stream.parameter.named<"model"::"onnx__MatMul_21036"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_21037 = #stream.parameter.named<"model"::"onnx__MatMul_21037"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_21049 = #stream.parameter.named<"model"::"onnx__MatMul_21049"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_21050 = #stream.parameter.named<"model"::"onnx__MatMul_21050"> : tensor<3072x12288xbf16> util.global private @onnx__MatMul_21051 = #stream.parameter.named<"model"::"onnx__MatMul_21051"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_21052 = #stream.parameter.named<"model"::"onnx__MatMul_21052"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_21053 = #stream.parameter.named<"model"::"onnx__MatMul_21053"> : tensor<3072x3072xbf16> util.global private @onnx__MatMul_21065 = #stream.parameter.named<"model"::"onnx__MatMul_21065"> : tensor<15360x3072xbf16> util.global private @onnx__MatMul_21069 = #stream.parameter.named<"model"::"onnx__MatMul_21069"> : tensor<3072x64xbf16> util.global private @"/time_text_embed/time_proj/Constant_3_attr__value" = #stream.parameter.named<"model"::"/time_text_embed/time_proj/Constant_3_attr__value"> : tensor<1x128xf32> util.global private @"/time_text_embed/time_proj_1/Constant_1_attr__value" = #stream.parameter.named<"model"::"/time_text_embed/time_proj_1/Constant_1_attr__value"> : tensor<1x128xf32> util.global private @"/transformer_blocks.0/norm1/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.0/norm1/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.0/norm1/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.0/norm1/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.0/norm1_context/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.0/norm1_context/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.0/norm1_context/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.0/norm1_context/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.0/norm2/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.0/norm2/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.0/norm2/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.0/norm2/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.0/norm2_context/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.0/norm2_context/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.0/norm2_context/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.0/norm2_context/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.1/norm1/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.1/norm1/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.1/norm1/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.1/norm1/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.1/norm1_context/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.1/norm1_context/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.1/norm1_context/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.1/norm1_context/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.1/norm2/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.1/norm2/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.1/norm2/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.1/norm2/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.1/norm2_context/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.1/norm2_context/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.1/norm2_context/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.1/norm2_context/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.2/norm1/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.2/norm1/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.2/norm1/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.2/norm1/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.2/norm1_context/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.2/norm1_context/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.2/norm1_context/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.2/norm1_context/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.2/norm2/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.2/norm2/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.2/norm2/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.2/norm2/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.2/norm2_context/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.2/norm2_context/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.2/norm2_context/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.2/norm2_context/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.3/norm1/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.3/norm1/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.3/norm1/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.3/norm1/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.3/norm1_context/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.3/norm1_context/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.3/norm1_context/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.3/norm1_context/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.3/norm2/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.3/norm2/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.3/norm2/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.3/norm2/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.3/norm2_context/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.3/norm2_context/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.3/norm2_context/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.3/norm2_context/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.4/norm1/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.4/norm1/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.4/norm1/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.4/norm1/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.4/norm1_context/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.4/norm1_context/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.4/norm1_context/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.4/norm1_context/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.4/norm2/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.4/norm2/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.4/norm2/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.4/norm2/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.4/norm2_context/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.4/norm2_context/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.4/norm2_context/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.4/norm2_context/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.5/norm1/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.5/norm1/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.5/norm1/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.5/norm1/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.5/norm1_context/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.5/norm1_context/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.5/norm1_context/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.5/norm1_context/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.5/norm2/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.5/norm2/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.5/norm2/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.5/norm2/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.5/norm2_context/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.5/norm2_context/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.5/norm2_context/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.5/norm2_context/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.6/norm1/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.6/norm1/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.6/norm1/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.6/norm1/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.6/norm1_context/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.6/norm1_context/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.6/norm1_context/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.6/norm1_context/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.6/norm2/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.6/norm2/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.6/norm2/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.6/norm2/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.6/norm2_context/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.6/norm2_context/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.6/norm2_context/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.6/norm2_context/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.7/norm1/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.7/norm1/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.7/norm1/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.7/norm1/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.7/norm1_context/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.7/norm1_context/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.7/norm1_context/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.7/norm1_context/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.7/norm2/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.7/norm2/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.7/norm2/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.7/norm2/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.7/norm2_context/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.7/norm2_context/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.7/norm2_context/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.7/norm2_context/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.8/norm1/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.8/norm1/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.8/norm1/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.8/norm1/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.8/norm1_context/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.8/norm1_context/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.8/norm1_context/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.8/norm1_context/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.8/norm2/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.8/norm2/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.8/norm2/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.8/norm2/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.8/norm2_context/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.8/norm2_context/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.8/norm2_context/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.8/norm2_context/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.9/norm1/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.9/norm1/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.9/norm1/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.9/norm1/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.9/norm1_context/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.9/norm1_context/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.9/norm1_context/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.9/norm1_context/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.9/norm2/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.9/norm2/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.9/norm2/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.9/norm2/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.9/norm2_context/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.9/norm2_context/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.9/norm2_context/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.9/norm2_context/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.10/norm1/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.10/norm1/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.10/norm1/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.10/norm1/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.10/norm1_context/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.10/norm1_context/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.10/norm1_context/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.10/norm1_context/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.10/norm2/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.10/norm2/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.10/norm2/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.10/norm2/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.10/norm2_context/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.10/norm2_context/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.10/norm2_context/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.10/norm2_context/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.11/norm1/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.11/norm1/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.11/norm1/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.11/norm1/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.11/norm1_context/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.11/norm1_context/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.11/norm1_context/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.11/norm1_context/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.11/norm2/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.11/norm2/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.11/norm2/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.11/norm2/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.11/norm2_context/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.11/norm2_context/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.11/norm2_context/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.11/norm2_context/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.12/norm1/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.12/norm1/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.12/norm1/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.12/norm1/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.12/norm1_context/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.12/norm1_context/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.12/norm1_context/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.12/norm1_context/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.12/norm2/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.12/norm2/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.12/norm2/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.12/norm2/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.12/norm2_context/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.12/norm2_context/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.12/norm2_context/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.12/norm2_context/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.13/norm1/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.13/norm1/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.13/norm1/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.13/norm1/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.13/norm1_context/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.13/norm1_context/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.13/norm1_context/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.13/norm1_context/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.13/norm2/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.13/norm2/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.13/norm2/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.13/norm2/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.13/norm2_context/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.13/norm2_context/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.13/norm2_context/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.13/norm2_context/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.14/norm1/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.14/norm1/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.14/norm1/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.14/norm1/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.14/norm1_context/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.14/norm1_context/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.14/norm1_context/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.14/norm1_context/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.14/norm2/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.14/norm2/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.14/norm2/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.14/norm2/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.14/norm2_context/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.14/norm2_context/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.14/norm2_context/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.14/norm2_context/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.15/norm1/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.15/norm1/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.15/norm1/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.15/norm1/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.15/norm1_context/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.15/norm1_context/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.15/norm1_context/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.15/norm1_context/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.15/norm2/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.15/norm2/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.15/norm2/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.15/norm2/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.15/norm2_context/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.15/norm2_context/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.15/norm2_context/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.15/norm2_context/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.16/norm1/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.16/norm1/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.16/norm1/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.16/norm1/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.16/norm1_context/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.16/norm1_context/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.16/norm1_context/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.16/norm1_context/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.16/norm2/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.16/norm2/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.16/norm2/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.16/norm2/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.16/norm2_context/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.16/norm2_context/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.16/norm2_context/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.16/norm2_context/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.17/norm1/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.17/norm1/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.17/norm1/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.17/norm1/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.17/norm1_context/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.17/norm1_context/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.17/norm1_context/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.17/norm1_context/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.17/norm2/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.17/norm2/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.17/norm2/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.17/norm2/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.17/norm2_context/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.17/norm2_context/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.17/norm2_context/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.17/norm2_context/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.18/norm1/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.18/norm1/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.18/norm1/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.18/norm1/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.18/norm1_context/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.18/norm1_context/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.18/norm1_context/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.18/norm1_context/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.18/norm2/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.18/norm2/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.18/norm2/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.18/norm2/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.18/norm2_context/Constant_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.18/norm2_context/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/transformer_blocks.18/norm2_context/Constant_1_attr__value" = #stream.parameter.named<"model"::"/transformer_blocks.18/norm2_context/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.0/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.0/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.0/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.0/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.1/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.1/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.1/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.1/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.2/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.2/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.2/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.2/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.3/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.3/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.3/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.3/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.4/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.4/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.4/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.4/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.5/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.5/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.5/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.5/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.6/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.6/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.6/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.6/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.7/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.7/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.7/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.7/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.8/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.8/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.8/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.8/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.9/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.9/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.9/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.9/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.10/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.10/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.10/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.10/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.11/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.11/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.11/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.11/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.12/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.12/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.12/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.12/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.13/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.13/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.13/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.13/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.14/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.14/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.14/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.14/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.15/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.15/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.15/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.15/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.16/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.16/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.16/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.16/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.17/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.17/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.17/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.17/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.18/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.18/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.18/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.18/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.19/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.19/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.19/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.19/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.20/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.20/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.20/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.20/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.21/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.21/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.21/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.21/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.22/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.22/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.22/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.22/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.23/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.23/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.23/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.23/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.24/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.24/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.24/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.24/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.25/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.25/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.25/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.25/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.26/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.26/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.26/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.26/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.27/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.27/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.27/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.27/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.28/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.28/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.28/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.28/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.29/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.29/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.29/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.29/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.30/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.30/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.30/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.30/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.31/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.31/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.31/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.31/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.32/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.32/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.32/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.32/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.33/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.33/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.33/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.33/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.34/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.34/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.34/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.34/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.35/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.35/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.35/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.35/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.36/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.36/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.36/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.36/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.37/norm/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.37/norm/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/single_transformer_blocks.37/norm/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/single_transformer_blocks.37/norm/norm/Constant_1_attr__value"> : tensor<3072xbf16> util.global private @"/norm_out/norm/Constant_attr__value" = #stream.parameter.named<"model"::"/norm_out/norm/Constant_attr__value"> : tensor<3072xbf16> util.global private @"/norm_out/norm/Constant_1_attr__value" = #stream.parameter.named<"model"::"/norm_out/norm/Constant_1_attr__value"> : tensor<3072xbf16> func.func @main_graph(%arg0: !torch.vtensor<[1,4096,64],bf16>, %arg1: !torch.vtensor<[1,512,4096],bf16>, %arg2: !torch.vtensor<[1,768],bf16>, %arg3: !torch.vtensor<[1],bf16>, %arg4: !torch.vtensor<[4096,3],bf16>, %arg5: !torch.vtensor<[512,3],bf16>, %arg6: !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,64],bf16> attributes {torch.onnx_meta.ir_version = 8 : si64, torch.onnx_meta.opset_version = 17 : si64, torch.onnx_meta.producer_name = "pytorch", torch.onnx_meta.producer_version = "2.5.1"} { %time_text_embed.timestep_embedder.linear_1.weight = util.global.load @time_text_embed.timestep_embedder.linear_1.weight : tensor<3072x256xbf16> %0 = torch_c.from_builtin_tensor %time_text_embed.timestep_embedder.linear_1.weight : tensor<3072x256xbf16> -> !torch.vtensor<[3072,256],bf16> %time_text_embed.timestep_embedder.linear_1.bias = util.global.load @time_text_embed.timestep_embedder.linear_1.bias : tensor<3072xbf16> %1 = torch_c.from_builtin_tensor %time_text_embed.timestep_embedder.linear_1.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %time_text_embed.timestep_embedder.linear_2.weight = util.global.load @time_text_embed.timestep_embedder.linear_2.weight : tensor<3072x3072xbf16> %2 = torch_c.from_builtin_tensor %time_text_embed.timestep_embedder.linear_2.weight : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %time_text_embed.timestep_embedder.linear_2.bias = util.global.load @time_text_embed.timestep_embedder.linear_2.bias : tensor<3072xbf16> %3 = torch_c.from_builtin_tensor %time_text_embed.timestep_embedder.linear_2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %time_text_embed.guidance_embedder.linear_1.weight = util.global.load @time_text_embed.guidance_embedder.linear_1.weight : tensor<3072x256xbf16> %4 = torch_c.from_builtin_tensor %time_text_embed.guidance_embedder.linear_1.weight : tensor<3072x256xbf16> -> !torch.vtensor<[3072,256],bf16> %time_text_embed.guidance_embedder.linear_1.bias = util.global.load @time_text_embed.guidance_embedder.linear_1.bias : tensor<3072xbf16> %5 = torch_c.from_builtin_tensor %time_text_embed.guidance_embedder.linear_1.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %time_text_embed.guidance_embedder.linear_2.weight = util.global.load @time_text_embed.guidance_embedder.linear_2.weight : tensor<3072x3072xbf16> %6 = torch_c.from_builtin_tensor %time_text_embed.guidance_embedder.linear_2.weight : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %time_text_embed.guidance_embedder.linear_2.bias = util.global.load @time_text_embed.guidance_embedder.linear_2.bias : tensor<3072xbf16> %7 = torch_c.from_builtin_tensor %time_text_embed.guidance_embedder.linear_2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %time_text_embed.text_embedder.linear_1.weight = util.global.load @time_text_embed.text_embedder.linear_1.weight : tensor<3072x768xbf16> %8 = torch_c.from_builtin_tensor %time_text_embed.text_embedder.linear_1.weight : tensor<3072x768xbf16> -> !torch.vtensor<[3072,768],bf16> %time_text_embed.text_embedder.linear_1.bias = util.global.load @time_text_embed.text_embedder.linear_1.bias : tensor<3072xbf16> %9 = torch_c.from_builtin_tensor %time_text_embed.text_embedder.linear_1.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %time_text_embed.text_embedder.linear_2.weight = util.global.load @time_text_embed.text_embedder.linear_2.weight : tensor<3072x3072xbf16> %10 = torch_c.from_builtin_tensor %time_text_embed.text_embedder.linear_2.weight : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %time_text_embed.text_embedder.linear_2.bias = util.global.load @time_text_embed.text_embedder.linear_2.bias : tensor<3072xbf16> %11 = torch_c.from_builtin_tensor %time_text_embed.text_embedder.linear_2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %context_embedder.bias = util.global.load @context_embedder.bias : tensor<3072xbf16> %12 = torch_c.from_builtin_tensor %context_embedder.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %x_embedder.bias = util.global.load @x_embedder.bias : tensor<3072xbf16> %13 = torch_c.from_builtin_tensor %x_embedder.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.0.norm1.linear.weight = util.global.load @transformer_blocks.0.norm1.linear.weight : tensor<18432x3072xbf16> %14 = torch_c.from_builtin_tensor %transformer_blocks.0.norm1.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.0.norm1.linear.bias = util.global.load @transformer_blocks.0.norm1.linear.bias : tensor<18432xbf16> %15 = torch_c.from_builtin_tensor %transformer_blocks.0.norm1.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.0.norm1_context.linear.weight = util.global.load @transformer_blocks.0.norm1_context.linear.weight : tensor<18432x3072xbf16> %16 = torch_c.from_builtin_tensor %transformer_blocks.0.norm1_context.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.0.norm1_context.linear.bias = util.global.load @transformer_blocks.0.norm1_context.linear.bias : tensor<18432xbf16> %17 = torch_c.from_builtin_tensor %transformer_blocks.0.norm1_context.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.0.attn.norm_q.weight = util.global.load @transformer_blocks.0.attn.norm_q.weight : tensor<128xbf16> %18 = torch_c.from_builtin_tensor %transformer_blocks.0.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.0.attn.norm_k.weight = util.global.load @transformer_blocks.0.attn.norm_k.weight : tensor<128xbf16> %19 = torch_c.from_builtin_tensor %transformer_blocks.0.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.0.attn.to_q.bias = util.global.load @transformer_blocks.0.attn.to_q.bias : tensor<3072xbf16> %20 = torch_c.from_builtin_tensor %transformer_blocks.0.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.0.attn.to_k.bias = util.global.load @transformer_blocks.0.attn.to_k.bias : tensor<3072xbf16> %21 = torch_c.from_builtin_tensor %transformer_blocks.0.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.0.attn.to_v.bias = util.global.load @transformer_blocks.0.attn.to_v.bias : tensor<3072xbf16> %22 = torch_c.from_builtin_tensor %transformer_blocks.0.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.0.attn.add_k_proj.bias = util.global.load @transformer_blocks.0.attn.add_k_proj.bias : tensor<3072xbf16> %23 = torch_c.from_builtin_tensor %transformer_blocks.0.attn.add_k_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.0.attn.add_v_proj.bias = util.global.load @transformer_blocks.0.attn.add_v_proj.bias : tensor<3072xbf16> %24 = torch_c.from_builtin_tensor %transformer_blocks.0.attn.add_v_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.0.attn.add_q_proj.bias = util.global.load @transformer_blocks.0.attn.add_q_proj.bias : tensor<3072xbf16> %25 = torch_c.from_builtin_tensor %transformer_blocks.0.attn.add_q_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.0.attn.to_out.0.bias = util.global.load @transformer_blocks.0.attn.to_out.0.bias : tensor<3072xbf16> %26 = torch_c.from_builtin_tensor %transformer_blocks.0.attn.to_out.0.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.0.attn.to_add_out.bias = util.global.load @transformer_blocks.0.attn.to_add_out.bias : tensor<3072xbf16> %27 = torch_c.from_builtin_tensor %transformer_blocks.0.attn.to_add_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.0.attn.norm_added_q.weight = util.global.load @transformer_blocks.0.attn.norm_added_q.weight : tensor<128xbf16> %28 = torch_c.from_builtin_tensor %transformer_blocks.0.attn.norm_added_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.0.attn.norm_added_k.weight = util.global.load @transformer_blocks.0.attn.norm_added_k.weight : tensor<128xbf16> %29 = torch_c.from_builtin_tensor %transformer_blocks.0.attn.norm_added_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.0.ff.net.0.proj.bias = util.global.load @transformer_blocks.0.ff.net.0.proj.bias : tensor<12288xbf16> %30 = torch_c.from_builtin_tensor %transformer_blocks.0.ff.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.0.ff.net.2.bias = util.global.load @transformer_blocks.0.ff.net.2.bias : tensor<3072xbf16> %31 = torch_c.from_builtin_tensor %transformer_blocks.0.ff.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.0.ff_context.net.0.proj.bias = util.global.load @transformer_blocks.0.ff_context.net.0.proj.bias : tensor<12288xbf16> %32 = torch_c.from_builtin_tensor %transformer_blocks.0.ff_context.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.0.ff_context.net.2.bias = util.global.load @transformer_blocks.0.ff_context.net.2.bias : tensor<3072xbf16> %33 = torch_c.from_builtin_tensor %transformer_blocks.0.ff_context.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.1.norm1.linear.weight = util.global.load @transformer_blocks.1.norm1.linear.weight : tensor<18432x3072xbf16> %34 = torch_c.from_builtin_tensor %transformer_blocks.1.norm1.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.1.norm1.linear.bias = util.global.load @transformer_blocks.1.norm1.linear.bias : tensor<18432xbf16> %35 = torch_c.from_builtin_tensor %transformer_blocks.1.norm1.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.1.norm1_context.linear.weight = util.global.load @transformer_blocks.1.norm1_context.linear.weight : tensor<18432x3072xbf16> %36 = torch_c.from_builtin_tensor %transformer_blocks.1.norm1_context.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.1.norm1_context.linear.bias = util.global.load @transformer_blocks.1.norm1_context.linear.bias : tensor<18432xbf16> %37 = torch_c.from_builtin_tensor %transformer_blocks.1.norm1_context.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.1.attn.norm_q.weight = util.global.load @transformer_blocks.1.attn.norm_q.weight : tensor<128xbf16> %38 = torch_c.from_builtin_tensor %transformer_blocks.1.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.1.attn.norm_k.weight = util.global.load @transformer_blocks.1.attn.norm_k.weight : tensor<128xbf16> %39 = torch_c.from_builtin_tensor %transformer_blocks.1.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.1.attn.to_q.bias = util.global.load @transformer_blocks.1.attn.to_q.bias : tensor<3072xbf16> %40 = torch_c.from_builtin_tensor %transformer_blocks.1.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.1.attn.to_k.bias = util.global.load @transformer_blocks.1.attn.to_k.bias : tensor<3072xbf16> %41 = torch_c.from_builtin_tensor %transformer_blocks.1.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.1.attn.to_v.bias = util.global.load @transformer_blocks.1.attn.to_v.bias : tensor<3072xbf16> %42 = torch_c.from_builtin_tensor %transformer_blocks.1.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.1.attn.add_k_proj.bias = util.global.load @transformer_blocks.1.attn.add_k_proj.bias : tensor<3072xbf16> %43 = torch_c.from_builtin_tensor %transformer_blocks.1.attn.add_k_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.1.attn.add_v_proj.bias = util.global.load @transformer_blocks.1.attn.add_v_proj.bias : tensor<3072xbf16> %44 = torch_c.from_builtin_tensor %transformer_blocks.1.attn.add_v_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.1.attn.add_q_proj.bias = util.global.load @transformer_blocks.1.attn.add_q_proj.bias : tensor<3072xbf16> %45 = torch_c.from_builtin_tensor %transformer_blocks.1.attn.add_q_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.1.attn.to_out.0.bias = util.global.load @transformer_blocks.1.attn.to_out.0.bias : tensor<3072xbf16> %46 = torch_c.from_builtin_tensor %transformer_blocks.1.attn.to_out.0.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.1.attn.to_add_out.bias = util.global.load @transformer_blocks.1.attn.to_add_out.bias : tensor<3072xbf16> %47 = torch_c.from_builtin_tensor %transformer_blocks.1.attn.to_add_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.1.attn.norm_added_q.weight = util.global.load @transformer_blocks.1.attn.norm_added_q.weight : tensor<128xbf16> %48 = torch_c.from_builtin_tensor %transformer_blocks.1.attn.norm_added_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.1.attn.norm_added_k.weight = util.global.load @transformer_blocks.1.attn.norm_added_k.weight : tensor<128xbf16> %49 = torch_c.from_builtin_tensor %transformer_blocks.1.attn.norm_added_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.1.ff.net.0.proj.bias = util.global.load @transformer_blocks.1.ff.net.0.proj.bias : tensor<12288xbf16> %50 = torch_c.from_builtin_tensor %transformer_blocks.1.ff.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.1.ff.net.2.bias = util.global.load @transformer_blocks.1.ff.net.2.bias : tensor<3072xbf16> %51 = torch_c.from_builtin_tensor %transformer_blocks.1.ff.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.1.ff_context.net.0.proj.bias = util.global.load @transformer_blocks.1.ff_context.net.0.proj.bias : tensor<12288xbf16> %52 = torch_c.from_builtin_tensor %transformer_blocks.1.ff_context.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.1.ff_context.net.2.bias = util.global.load @transformer_blocks.1.ff_context.net.2.bias : tensor<3072xbf16> %53 = torch_c.from_builtin_tensor %transformer_blocks.1.ff_context.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.2.norm1.linear.weight = util.global.load @transformer_blocks.2.norm1.linear.weight : tensor<18432x3072xbf16> %54 = torch_c.from_builtin_tensor %transformer_blocks.2.norm1.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.2.norm1.linear.bias = util.global.load @transformer_blocks.2.norm1.linear.bias : tensor<18432xbf16> %55 = torch_c.from_builtin_tensor %transformer_blocks.2.norm1.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.2.norm1_context.linear.weight = util.global.load @transformer_blocks.2.norm1_context.linear.weight : tensor<18432x3072xbf16> %56 = torch_c.from_builtin_tensor %transformer_blocks.2.norm1_context.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.2.norm1_context.linear.bias = util.global.load @transformer_blocks.2.norm1_context.linear.bias : tensor<18432xbf16> %57 = torch_c.from_builtin_tensor %transformer_blocks.2.norm1_context.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.2.attn.norm_q.weight = util.global.load @transformer_blocks.2.attn.norm_q.weight : tensor<128xbf16> %58 = torch_c.from_builtin_tensor %transformer_blocks.2.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.2.attn.norm_k.weight = util.global.load @transformer_blocks.2.attn.norm_k.weight : tensor<128xbf16> %59 = torch_c.from_builtin_tensor %transformer_blocks.2.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.2.attn.to_q.bias = util.global.load @transformer_blocks.2.attn.to_q.bias : tensor<3072xbf16> %60 = torch_c.from_builtin_tensor %transformer_blocks.2.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.2.attn.to_k.bias = util.global.load @transformer_blocks.2.attn.to_k.bias : tensor<3072xbf16> %61 = torch_c.from_builtin_tensor %transformer_blocks.2.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.2.attn.to_v.bias = util.global.load @transformer_blocks.2.attn.to_v.bias : tensor<3072xbf16> %62 = torch_c.from_builtin_tensor %transformer_blocks.2.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.2.attn.add_k_proj.bias = util.global.load @transformer_blocks.2.attn.add_k_proj.bias : tensor<3072xbf16> %63 = torch_c.from_builtin_tensor %transformer_blocks.2.attn.add_k_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.2.attn.add_v_proj.bias = util.global.load @transformer_blocks.2.attn.add_v_proj.bias : tensor<3072xbf16> %64 = torch_c.from_builtin_tensor %transformer_blocks.2.attn.add_v_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.2.attn.add_q_proj.bias = util.global.load @transformer_blocks.2.attn.add_q_proj.bias : tensor<3072xbf16> %65 = torch_c.from_builtin_tensor %transformer_blocks.2.attn.add_q_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.2.attn.to_out.0.bias = util.global.load @transformer_blocks.2.attn.to_out.0.bias : tensor<3072xbf16> %66 = torch_c.from_builtin_tensor %transformer_blocks.2.attn.to_out.0.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.2.attn.to_add_out.bias = util.global.load @transformer_blocks.2.attn.to_add_out.bias : tensor<3072xbf16> %67 = torch_c.from_builtin_tensor %transformer_blocks.2.attn.to_add_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.2.attn.norm_added_q.weight = util.global.load @transformer_blocks.2.attn.norm_added_q.weight : tensor<128xbf16> %68 = torch_c.from_builtin_tensor %transformer_blocks.2.attn.norm_added_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.2.attn.norm_added_k.weight = util.global.load @transformer_blocks.2.attn.norm_added_k.weight : tensor<128xbf16> %69 = torch_c.from_builtin_tensor %transformer_blocks.2.attn.norm_added_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.2.ff.net.0.proj.bias = util.global.load @transformer_blocks.2.ff.net.0.proj.bias : tensor<12288xbf16> %70 = torch_c.from_builtin_tensor %transformer_blocks.2.ff.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.2.ff.net.2.bias = util.global.load @transformer_blocks.2.ff.net.2.bias : tensor<3072xbf16> %71 = torch_c.from_builtin_tensor %transformer_blocks.2.ff.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.2.ff_context.net.0.proj.bias = util.global.load @transformer_blocks.2.ff_context.net.0.proj.bias : tensor<12288xbf16> %72 = torch_c.from_builtin_tensor %transformer_blocks.2.ff_context.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.2.ff_context.net.2.bias = util.global.load @transformer_blocks.2.ff_context.net.2.bias : tensor<3072xbf16> %73 = torch_c.from_builtin_tensor %transformer_blocks.2.ff_context.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.3.norm1.linear.weight = util.global.load @transformer_blocks.3.norm1.linear.weight : tensor<18432x3072xbf16> %74 = torch_c.from_builtin_tensor %transformer_blocks.3.norm1.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.3.norm1.linear.bias = util.global.load @transformer_blocks.3.norm1.linear.bias : tensor<18432xbf16> %75 = torch_c.from_builtin_tensor %transformer_blocks.3.norm1.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.3.norm1_context.linear.weight = util.global.load @transformer_blocks.3.norm1_context.linear.weight : tensor<18432x3072xbf16> %76 = torch_c.from_builtin_tensor %transformer_blocks.3.norm1_context.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.3.norm1_context.linear.bias = util.global.load @transformer_blocks.3.norm1_context.linear.bias : tensor<18432xbf16> %77 = torch_c.from_builtin_tensor %transformer_blocks.3.norm1_context.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.3.attn.norm_q.weight = util.global.load @transformer_blocks.3.attn.norm_q.weight : tensor<128xbf16> %78 = torch_c.from_builtin_tensor %transformer_blocks.3.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.3.attn.norm_k.weight = util.global.load @transformer_blocks.3.attn.norm_k.weight : tensor<128xbf16> %79 = torch_c.from_builtin_tensor %transformer_blocks.3.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.3.attn.to_q.bias = util.global.load @transformer_blocks.3.attn.to_q.bias : tensor<3072xbf16> %80 = torch_c.from_builtin_tensor %transformer_blocks.3.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.3.attn.to_k.bias = util.global.load @transformer_blocks.3.attn.to_k.bias : tensor<3072xbf16> %81 = torch_c.from_builtin_tensor %transformer_blocks.3.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.3.attn.to_v.bias = util.global.load @transformer_blocks.3.attn.to_v.bias : tensor<3072xbf16> %82 = torch_c.from_builtin_tensor %transformer_blocks.3.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.3.attn.add_k_proj.bias = util.global.load @transformer_blocks.3.attn.add_k_proj.bias : tensor<3072xbf16> %83 = torch_c.from_builtin_tensor %transformer_blocks.3.attn.add_k_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.3.attn.add_v_proj.bias = util.global.load @transformer_blocks.3.attn.add_v_proj.bias : tensor<3072xbf16> %84 = torch_c.from_builtin_tensor %transformer_blocks.3.attn.add_v_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.3.attn.add_q_proj.bias = util.global.load @transformer_blocks.3.attn.add_q_proj.bias : tensor<3072xbf16> %85 = torch_c.from_builtin_tensor %transformer_blocks.3.attn.add_q_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.3.attn.to_out.0.bias = util.global.load @transformer_blocks.3.attn.to_out.0.bias : tensor<3072xbf16> %86 = torch_c.from_builtin_tensor %transformer_blocks.3.attn.to_out.0.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.3.attn.to_add_out.bias = util.global.load @transformer_blocks.3.attn.to_add_out.bias : tensor<3072xbf16> %87 = torch_c.from_builtin_tensor %transformer_blocks.3.attn.to_add_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.3.attn.norm_added_q.weight = util.global.load @transformer_blocks.3.attn.norm_added_q.weight : tensor<128xbf16> %88 = torch_c.from_builtin_tensor %transformer_blocks.3.attn.norm_added_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.3.attn.norm_added_k.weight = util.global.load @transformer_blocks.3.attn.norm_added_k.weight : tensor<128xbf16> %89 = torch_c.from_builtin_tensor %transformer_blocks.3.attn.norm_added_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.3.ff.net.0.proj.bias = util.global.load @transformer_blocks.3.ff.net.0.proj.bias : tensor<12288xbf16> %90 = torch_c.from_builtin_tensor %transformer_blocks.3.ff.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.3.ff.net.2.bias = util.global.load @transformer_blocks.3.ff.net.2.bias : tensor<3072xbf16> %91 = torch_c.from_builtin_tensor %transformer_blocks.3.ff.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.3.ff_context.net.0.proj.bias = util.global.load @transformer_blocks.3.ff_context.net.0.proj.bias : tensor<12288xbf16> %92 = torch_c.from_builtin_tensor %transformer_blocks.3.ff_context.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.3.ff_context.net.2.bias = util.global.load @transformer_blocks.3.ff_context.net.2.bias : tensor<3072xbf16> %93 = torch_c.from_builtin_tensor %transformer_blocks.3.ff_context.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.4.norm1.linear.weight = util.global.load @transformer_blocks.4.norm1.linear.weight : tensor<18432x3072xbf16> %94 = torch_c.from_builtin_tensor %transformer_blocks.4.norm1.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.4.norm1.linear.bias = util.global.load @transformer_blocks.4.norm1.linear.bias : tensor<18432xbf16> %95 = torch_c.from_builtin_tensor %transformer_blocks.4.norm1.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.4.norm1_context.linear.weight = util.global.load @transformer_blocks.4.norm1_context.linear.weight : tensor<18432x3072xbf16> %96 = torch_c.from_builtin_tensor %transformer_blocks.4.norm1_context.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.4.norm1_context.linear.bias = util.global.load @transformer_blocks.4.norm1_context.linear.bias : tensor<18432xbf16> %97 = torch_c.from_builtin_tensor %transformer_blocks.4.norm1_context.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.4.attn.norm_q.weight = util.global.load @transformer_blocks.4.attn.norm_q.weight : tensor<128xbf16> %98 = torch_c.from_builtin_tensor %transformer_blocks.4.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.4.attn.norm_k.weight = util.global.load @transformer_blocks.4.attn.norm_k.weight : tensor<128xbf16> %99 = torch_c.from_builtin_tensor %transformer_blocks.4.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.4.attn.to_q.bias = util.global.load @transformer_blocks.4.attn.to_q.bias : tensor<3072xbf16> %100 = torch_c.from_builtin_tensor %transformer_blocks.4.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.4.attn.to_k.bias = util.global.load @transformer_blocks.4.attn.to_k.bias : tensor<3072xbf16> %101 = torch_c.from_builtin_tensor %transformer_blocks.4.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.4.attn.to_v.bias = util.global.load @transformer_blocks.4.attn.to_v.bias : tensor<3072xbf16> %102 = torch_c.from_builtin_tensor %transformer_blocks.4.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.4.attn.add_k_proj.bias = util.global.load @transformer_blocks.4.attn.add_k_proj.bias : tensor<3072xbf16> %103 = torch_c.from_builtin_tensor %transformer_blocks.4.attn.add_k_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.4.attn.add_v_proj.bias = util.global.load @transformer_blocks.4.attn.add_v_proj.bias : tensor<3072xbf16> %104 = torch_c.from_builtin_tensor %transformer_blocks.4.attn.add_v_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.4.attn.add_q_proj.bias = util.global.load @transformer_blocks.4.attn.add_q_proj.bias : tensor<3072xbf16> %105 = torch_c.from_builtin_tensor %transformer_blocks.4.attn.add_q_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.4.attn.to_out.0.bias = util.global.load @transformer_blocks.4.attn.to_out.0.bias : tensor<3072xbf16> %106 = torch_c.from_builtin_tensor %transformer_blocks.4.attn.to_out.0.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.4.attn.to_add_out.bias = util.global.load @transformer_blocks.4.attn.to_add_out.bias : tensor<3072xbf16> %107 = torch_c.from_builtin_tensor %transformer_blocks.4.attn.to_add_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.4.attn.norm_added_q.weight = util.global.load @transformer_blocks.4.attn.norm_added_q.weight : tensor<128xbf16> %108 = torch_c.from_builtin_tensor %transformer_blocks.4.attn.norm_added_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.4.attn.norm_added_k.weight = util.global.load @transformer_blocks.4.attn.norm_added_k.weight : tensor<128xbf16> %109 = torch_c.from_builtin_tensor %transformer_blocks.4.attn.norm_added_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.4.ff.net.0.proj.bias = util.global.load @transformer_blocks.4.ff.net.0.proj.bias : tensor<12288xbf16> %110 = torch_c.from_builtin_tensor %transformer_blocks.4.ff.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.4.ff.net.2.bias = util.global.load @transformer_blocks.4.ff.net.2.bias : tensor<3072xbf16> %111 = torch_c.from_builtin_tensor %transformer_blocks.4.ff.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.4.ff_context.net.0.proj.bias = util.global.load @transformer_blocks.4.ff_context.net.0.proj.bias : tensor<12288xbf16> %112 = torch_c.from_builtin_tensor %transformer_blocks.4.ff_context.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.4.ff_context.net.2.bias = util.global.load @transformer_blocks.4.ff_context.net.2.bias : tensor<3072xbf16> %113 = torch_c.from_builtin_tensor %transformer_blocks.4.ff_context.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.5.norm1.linear.weight = util.global.load @transformer_blocks.5.norm1.linear.weight : tensor<18432x3072xbf16> %114 = torch_c.from_builtin_tensor %transformer_blocks.5.norm1.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.5.norm1.linear.bias = util.global.load @transformer_blocks.5.norm1.linear.bias : tensor<18432xbf16> %115 = torch_c.from_builtin_tensor %transformer_blocks.5.norm1.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.5.norm1_context.linear.weight = util.global.load @transformer_blocks.5.norm1_context.linear.weight : tensor<18432x3072xbf16> %116 = torch_c.from_builtin_tensor %transformer_blocks.5.norm1_context.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.5.norm1_context.linear.bias = util.global.load @transformer_blocks.5.norm1_context.linear.bias : tensor<18432xbf16> %117 = torch_c.from_builtin_tensor %transformer_blocks.5.norm1_context.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.5.attn.norm_q.weight = util.global.load @transformer_blocks.5.attn.norm_q.weight : tensor<128xbf16> %118 = torch_c.from_builtin_tensor %transformer_blocks.5.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.5.attn.norm_k.weight = util.global.load @transformer_blocks.5.attn.norm_k.weight : tensor<128xbf16> %119 = torch_c.from_builtin_tensor %transformer_blocks.5.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.5.attn.to_q.bias = util.global.load @transformer_blocks.5.attn.to_q.bias : tensor<3072xbf16> %120 = torch_c.from_builtin_tensor %transformer_blocks.5.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.5.attn.to_k.bias = util.global.load @transformer_blocks.5.attn.to_k.bias : tensor<3072xbf16> %121 = torch_c.from_builtin_tensor %transformer_blocks.5.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.5.attn.to_v.bias = util.global.load @transformer_blocks.5.attn.to_v.bias : tensor<3072xbf16> %122 = torch_c.from_builtin_tensor %transformer_blocks.5.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.5.attn.add_k_proj.bias = util.global.load @transformer_blocks.5.attn.add_k_proj.bias : tensor<3072xbf16> %123 = torch_c.from_builtin_tensor %transformer_blocks.5.attn.add_k_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.5.attn.add_v_proj.bias = util.global.load @transformer_blocks.5.attn.add_v_proj.bias : tensor<3072xbf16> %124 = torch_c.from_builtin_tensor %transformer_blocks.5.attn.add_v_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.5.attn.add_q_proj.bias = util.global.load @transformer_blocks.5.attn.add_q_proj.bias : tensor<3072xbf16> %125 = torch_c.from_builtin_tensor %transformer_blocks.5.attn.add_q_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.5.attn.to_out.0.bias = util.global.load @transformer_blocks.5.attn.to_out.0.bias : tensor<3072xbf16> %126 = torch_c.from_builtin_tensor %transformer_blocks.5.attn.to_out.0.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.5.attn.to_add_out.bias = util.global.load @transformer_blocks.5.attn.to_add_out.bias : tensor<3072xbf16> %127 = torch_c.from_builtin_tensor %transformer_blocks.5.attn.to_add_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.5.attn.norm_added_q.weight = util.global.load @transformer_blocks.5.attn.norm_added_q.weight : tensor<128xbf16> %128 = torch_c.from_builtin_tensor %transformer_blocks.5.attn.norm_added_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.5.attn.norm_added_k.weight = util.global.load @transformer_blocks.5.attn.norm_added_k.weight : tensor<128xbf16> %129 = torch_c.from_builtin_tensor %transformer_blocks.5.attn.norm_added_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.5.ff.net.0.proj.bias = util.global.load @transformer_blocks.5.ff.net.0.proj.bias : tensor<12288xbf16> %130 = torch_c.from_builtin_tensor %transformer_blocks.5.ff.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.5.ff.net.2.bias = util.global.load @transformer_blocks.5.ff.net.2.bias : tensor<3072xbf16> %131 = torch_c.from_builtin_tensor %transformer_blocks.5.ff.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.5.ff_context.net.0.proj.bias = util.global.load @transformer_blocks.5.ff_context.net.0.proj.bias : tensor<12288xbf16> %132 = torch_c.from_builtin_tensor %transformer_blocks.5.ff_context.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.5.ff_context.net.2.bias = util.global.load @transformer_blocks.5.ff_context.net.2.bias : tensor<3072xbf16> %133 = torch_c.from_builtin_tensor %transformer_blocks.5.ff_context.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.6.norm1.linear.weight = util.global.load @transformer_blocks.6.norm1.linear.weight : tensor<18432x3072xbf16> %134 = torch_c.from_builtin_tensor %transformer_blocks.6.norm1.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.6.norm1.linear.bias = util.global.load @transformer_blocks.6.norm1.linear.bias : tensor<18432xbf16> %135 = torch_c.from_builtin_tensor %transformer_blocks.6.norm1.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.6.norm1_context.linear.weight = util.global.load @transformer_blocks.6.norm1_context.linear.weight : tensor<18432x3072xbf16> %136 = torch_c.from_builtin_tensor %transformer_blocks.6.norm1_context.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.6.norm1_context.linear.bias = util.global.load @transformer_blocks.6.norm1_context.linear.bias : tensor<18432xbf16> %137 = torch_c.from_builtin_tensor %transformer_blocks.6.norm1_context.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.6.attn.norm_q.weight = util.global.load @transformer_blocks.6.attn.norm_q.weight : tensor<128xbf16> %138 = torch_c.from_builtin_tensor %transformer_blocks.6.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.6.attn.norm_k.weight = util.global.load @transformer_blocks.6.attn.norm_k.weight : tensor<128xbf16> %139 = torch_c.from_builtin_tensor %transformer_blocks.6.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.6.attn.to_q.bias = util.global.load @transformer_blocks.6.attn.to_q.bias : tensor<3072xbf16> %140 = torch_c.from_builtin_tensor %transformer_blocks.6.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.6.attn.to_k.bias = util.global.load @transformer_blocks.6.attn.to_k.bias : tensor<3072xbf16> %141 = torch_c.from_builtin_tensor %transformer_blocks.6.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.6.attn.to_v.bias = util.global.load @transformer_blocks.6.attn.to_v.bias : tensor<3072xbf16> %142 = torch_c.from_builtin_tensor %transformer_blocks.6.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.6.attn.add_k_proj.bias = util.global.load @transformer_blocks.6.attn.add_k_proj.bias : tensor<3072xbf16> %143 = torch_c.from_builtin_tensor %transformer_blocks.6.attn.add_k_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.6.attn.add_v_proj.bias = util.global.load @transformer_blocks.6.attn.add_v_proj.bias : tensor<3072xbf16> %144 = torch_c.from_builtin_tensor %transformer_blocks.6.attn.add_v_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.6.attn.add_q_proj.bias = util.global.load @transformer_blocks.6.attn.add_q_proj.bias : tensor<3072xbf16> %145 = torch_c.from_builtin_tensor %transformer_blocks.6.attn.add_q_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.6.attn.to_out.0.bias = util.global.load @transformer_blocks.6.attn.to_out.0.bias : tensor<3072xbf16> %146 = torch_c.from_builtin_tensor %transformer_blocks.6.attn.to_out.0.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.6.attn.to_add_out.bias = util.global.load @transformer_blocks.6.attn.to_add_out.bias : tensor<3072xbf16> %147 = torch_c.from_builtin_tensor %transformer_blocks.6.attn.to_add_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.6.attn.norm_added_q.weight = util.global.load @transformer_blocks.6.attn.norm_added_q.weight : tensor<128xbf16> %148 = torch_c.from_builtin_tensor %transformer_blocks.6.attn.norm_added_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.6.attn.norm_added_k.weight = util.global.load @transformer_blocks.6.attn.norm_added_k.weight : tensor<128xbf16> %149 = torch_c.from_builtin_tensor %transformer_blocks.6.attn.norm_added_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.6.ff.net.0.proj.bias = util.global.load @transformer_blocks.6.ff.net.0.proj.bias : tensor<12288xbf16> %150 = torch_c.from_builtin_tensor %transformer_blocks.6.ff.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.6.ff.net.2.bias = util.global.load @transformer_blocks.6.ff.net.2.bias : tensor<3072xbf16> %151 = torch_c.from_builtin_tensor %transformer_blocks.6.ff.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.6.ff_context.net.0.proj.bias = util.global.load @transformer_blocks.6.ff_context.net.0.proj.bias : tensor<12288xbf16> %152 = torch_c.from_builtin_tensor %transformer_blocks.6.ff_context.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.6.ff_context.net.2.bias = util.global.load @transformer_blocks.6.ff_context.net.2.bias : tensor<3072xbf16> %153 = torch_c.from_builtin_tensor %transformer_blocks.6.ff_context.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.7.norm1.linear.weight = util.global.load @transformer_blocks.7.norm1.linear.weight : tensor<18432x3072xbf16> %154 = torch_c.from_builtin_tensor %transformer_blocks.7.norm1.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.7.norm1.linear.bias = util.global.load @transformer_blocks.7.norm1.linear.bias : tensor<18432xbf16> %155 = torch_c.from_builtin_tensor %transformer_blocks.7.norm1.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.7.norm1_context.linear.weight = util.global.load @transformer_blocks.7.norm1_context.linear.weight : tensor<18432x3072xbf16> %156 = torch_c.from_builtin_tensor %transformer_blocks.7.norm1_context.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.7.norm1_context.linear.bias = util.global.load @transformer_blocks.7.norm1_context.linear.bias : tensor<18432xbf16> %157 = torch_c.from_builtin_tensor %transformer_blocks.7.norm1_context.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.7.attn.norm_q.weight = util.global.load @transformer_blocks.7.attn.norm_q.weight : tensor<128xbf16> %158 = torch_c.from_builtin_tensor %transformer_blocks.7.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.7.attn.norm_k.weight = util.global.load @transformer_blocks.7.attn.norm_k.weight : tensor<128xbf16> %159 = torch_c.from_builtin_tensor %transformer_blocks.7.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.7.attn.to_q.bias = util.global.load @transformer_blocks.7.attn.to_q.bias : tensor<3072xbf16> %160 = torch_c.from_builtin_tensor %transformer_blocks.7.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.7.attn.to_k.bias = util.global.load @transformer_blocks.7.attn.to_k.bias : tensor<3072xbf16> %161 = torch_c.from_builtin_tensor %transformer_blocks.7.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.7.attn.to_v.bias = util.global.load @transformer_blocks.7.attn.to_v.bias : tensor<3072xbf16> %162 = torch_c.from_builtin_tensor %transformer_blocks.7.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.7.attn.add_k_proj.bias = util.global.load @transformer_blocks.7.attn.add_k_proj.bias : tensor<3072xbf16> %163 = torch_c.from_builtin_tensor %transformer_blocks.7.attn.add_k_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.7.attn.add_v_proj.bias = util.global.load @transformer_blocks.7.attn.add_v_proj.bias : tensor<3072xbf16> %164 = torch_c.from_builtin_tensor %transformer_blocks.7.attn.add_v_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.7.attn.add_q_proj.bias = util.global.load @transformer_blocks.7.attn.add_q_proj.bias : tensor<3072xbf16> %165 = torch_c.from_builtin_tensor %transformer_blocks.7.attn.add_q_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.7.attn.to_out.0.bias = util.global.load @transformer_blocks.7.attn.to_out.0.bias : tensor<3072xbf16> %166 = torch_c.from_builtin_tensor %transformer_blocks.7.attn.to_out.0.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.7.attn.to_add_out.bias = util.global.load @transformer_blocks.7.attn.to_add_out.bias : tensor<3072xbf16> %167 = torch_c.from_builtin_tensor %transformer_blocks.7.attn.to_add_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.7.attn.norm_added_q.weight = util.global.load @transformer_blocks.7.attn.norm_added_q.weight : tensor<128xbf16> %168 = torch_c.from_builtin_tensor %transformer_blocks.7.attn.norm_added_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.7.attn.norm_added_k.weight = util.global.load @transformer_blocks.7.attn.norm_added_k.weight : tensor<128xbf16> %169 = torch_c.from_builtin_tensor %transformer_blocks.7.attn.norm_added_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.7.ff.net.0.proj.bias = util.global.load @transformer_blocks.7.ff.net.0.proj.bias : tensor<12288xbf16> %170 = torch_c.from_builtin_tensor %transformer_blocks.7.ff.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.7.ff.net.2.bias = util.global.load @transformer_blocks.7.ff.net.2.bias : tensor<3072xbf16> %171 = torch_c.from_builtin_tensor %transformer_blocks.7.ff.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.7.ff_context.net.0.proj.bias = util.global.load @transformer_blocks.7.ff_context.net.0.proj.bias : tensor<12288xbf16> %172 = torch_c.from_builtin_tensor %transformer_blocks.7.ff_context.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.7.ff_context.net.2.bias = util.global.load @transformer_blocks.7.ff_context.net.2.bias : tensor<3072xbf16> %173 = torch_c.from_builtin_tensor %transformer_blocks.7.ff_context.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.8.norm1.linear.weight = util.global.load @transformer_blocks.8.norm1.linear.weight : tensor<18432x3072xbf16> %174 = torch_c.from_builtin_tensor %transformer_blocks.8.norm1.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.8.norm1.linear.bias = util.global.load @transformer_blocks.8.norm1.linear.bias : tensor<18432xbf16> %175 = torch_c.from_builtin_tensor %transformer_blocks.8.norm1.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.8.norm1_context.linear.weight = util.global.load @transformer_blocks.8.norm1_context.linear.weight : tensor<18432x3072xbf16> %176 = torch_c.from_builtin_tensor %transformer_blocks.8.norm1_context.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.8.norm1_context.linear.bias = util.global.load @transformer_blocks.8.norm1_context.linear.bias : tensor<18432xbf16> %177 = torch_c.from_builtin_tensor %transformer_blocks.8.norm1_context.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.8.attn.norm_q.weight = util.global.load @transformer_blocks.8.attn.norm_q.weight : tensor<128xbf16> %178 = torch_c.from_builtin_tensor %transformer_blocks.8.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.8.attn.norm_k.weight = util.global.load @transformer_blocks.8.attn.norm_k.weight : tensor<128xbf16> %179 = torch_c.from_builtin_tensor %transformer_blocks.8.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.8.attn.to_q.bias = util.global.load @transformer_blocks.8.attn.to_q.bias : tensor<3072xbf16> %180 = torch_c.from_builtin_tensor %transformer_blocks.8.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.8.attn.to_k.bias = util.global.load @transformer_blocks.8.attn.to_k.bias : tensor<3072xbf16> %181 = torch_c.from_builtin_tensor %transformer_blocks.8.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.8.attn.to_v.bias = util.global.load @transformer_blocks.8.attn.to_v.bias : tensor<3072xbf16> %182 = torch_c.from_builtin_tensor %transformer_blocks.8.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.8.attn.add_k_proj.bias = util.global.load @transformer_blocks.8.attn.add_k_proj.bias : tensor<3072xbf16> %183 = torch_c.from_builtin_tensor %transformer_blocks.8.attn.add_k_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.8.attn.add_v_proj.bias = util.global.load @transformer_blocks.8.attn.add_v_proj.bias : tensor<3072xbf16> %184 = torch_c.from_builtin_tensor %transformer_blocks.8.attn.add_v_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.8.attn.add_q_proj.bias = util.global.load @transformer_blocks.8.attn.add_q_proj.bias : tensor<3072xbf16> %185 = torch_c.from_builtin_tensor %transformer_blocks.8.attn.add_q_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.8.attn.to_out.0.bias = util.global.load @transformer_blocks.8.attn.to_out.0.bias : tensor<3072xbf16> %186 = torch_c.from_builtin_tensor %transformer_blocks.8.attn.to_out.0.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.8.attn.to_add_out.bias = util.global.load @transformer_blocks.8.attn.to_add_out.bias : tensor<3072xbf16> %187 = torch_c.from_builtin_tensor %transformer_blocks.8.attn.to_add_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.8.attn.norm_added_q.weight = util.global.load @transformer_blocks.8.attn.norm_added_q.weight : tensor<128xbf16> %188 = torch_c.from_builtin_tensor %transformer_blocks.8.attn.norm_added_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.8.attn.norm_added_k.weight = util.global.load @transformer_blocks.8.attn.norm_added_k.weight : tensor<128xbf16> %189 = torch_c.from_builtin_tensor %transformer_blocks.8.attn.norm_added_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.8.ff.net.0.proj.bias = util.global.load @transformer_blocks.8.ff.net.0.proj.bias : tensor<12288xbf16> %190 = torch_c.from_builtin_tensor %transformer_blocks.8.ff.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.8.ff.net.2.bias = util.global.load @transformer_blocks.8.ff.net.2.bias : tensor<3072xbf16> %191 = torch_c.from_builtin_tensor %transformer_blocks.8.ff.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.8.ff_context.net.0.proj.bias = util.global.load @transformer_blocks.8.ff_context.net.0.proj.bias : tensor<12288xbf16> %192 = torch_c.from_builtin_tensor %transformer_blocks.8.ff_context.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.8.ff_context.net.2.bias = util.global.load @transformer_blocks.8.ff_context.net.2.bias : tensor<3072xbf16> %193 = torch_c.from_builtin_tensor %transformer_blocks.8.ff_context.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.9.norm1.linear.weight = util.global.load @transformer_blocks.9.norm1.linear.weight : tensor<18432x3072xbf16> %194 = torch_c.from_builtin_tensor %transformer_blocks.9.norm1.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.9.norm1.linear.bias = util.global.load @transformer_blocks.9.norm1.linear.bias : tensor<18432xbf16> %195 = torch_c.from_builtin_tensor %transformer_blocks.9.norm1.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.9.norm1_context.linear.weight = util.global.load @transformer_blocks.9.norm1_context.linear.weight : tensor<18432x3072xbf16> %196 = torch_c.from_builtin_tensor %transformer_blocks.9.norm1_context.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.9.norm1_context.linear.bias = util.global.load @transformer_blocks.9.norm1_context.linear.bias : tensor<18432xbf16> %197 = torch_c.from_builtin_tensor %transformer_blocks.9.norm1_context.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.9.attn.norm_q.weight = util.global.load @transformer_blocks.9.attn.norm_q.weight : tensor<128xbf16> %198 = torch_c.from_builtin_tensor %transformer_blocks.9.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.9.attn.norm_k.weight = util.global.load @transformer_blocks.9.attn.norm_k.weight : tensor<128xbf16> %199 = torch_c.from_builtin_tensor %transformer_blocks.9.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.9.attn.to_q.bias = util.global.load @transformer_blocks.9.attn.to_q.bias : tensor<3072xbf16> %200 = torch_c.from_builtin_tensor %transformer_blocks.9.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.9.attn.to_k.bias = util.global.load @transformer_blocks.9.attn.to_k.bias : tensor<3072xbf16> %201 = torch_c.from_builtin_tensor %transformer_blocks.9.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.9.attn.to_v.bias = util.global.load @transformer_blocks.9.attn.to_v.bias : tensor<3072xbf16> %202 = torch_c.from_builtin_tensor %transformer_blocks.9.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.9.attn.add_k_proj.bias = util.global.load @transformer_blocks.9.attn.add_k_proj.bias : tensor<3072xbf16> %203 = torch_c.from_builtin_tensor %transformer_blocks.9.attn.add_k_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.9.attn.add_v_proj.bias = util.global.load @transformer_blocks.9.attn.add_v_proj.bias : tensor<3072xbf16> %204 = torch_c.from_builtin_tensor %transformer_blocks.9.attn.add_v_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.9.attn.add_q_proj.bias = util.global.load @transformer_blocks.9.attn.add_q_proj.bias : tensor<3072xbf16> %205 = torch_c.from_builtin_tensor %transformer_blocks.9.attn.add_q_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.9.attn.to_out.0.bias = util.global.load @transformer_blocks.9.attn.to_out.0.bias : tensor<3072xbf16> %206 = torch_c.from_builtin_tensor %transformer_blocks.9.attn.to_out.0.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.9.attn.to_add_out.bias = util.global.load @transformer_blocks.9.attn.to_add_out.bias : tensor<3072xbf16> %207 = torch_c.from_builtin_tensor %transformer_blocks.9.attn.to_add_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.9.attn.norm_added_q.weight = util.global.load @transformer_blocks.9.attn.norm_added_q.weight : tensor<128xbf16> %208 = torch_c.from_builtin_tensor %transformer_blocks.9.attn.norm_added_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.9.attn.norm_added_k.weight = util.global.load @transformer_blocks.9.attn.norm_added_k.weight : tensor<128xbf16> %209 = torch_c.from_builtin_tensor %transformer_blocks.9.attn.norm_added_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.9.ff.net.0.proj.bias = util.global.load @transformer_blocks.9.ff.net.0.proj.bias : tensor<12288xbf16> %210 = torch_c.from_builtin_tensor %transformer_blocks.9.ff.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.9.ff.net.2.bias = util.global.load @transformer_blocks.9.ff.net.2.bias : tensor<3072xbf16> %211 = torch_c.from_builtin_tensor %transformer_blocks.9.ff.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.9.ff_context.net.0.proj.bias = util.global.load @transformer_blocks.9.ff_context.net.0.proj.bias : tensor<12288xbf16> %212 = torch_c.from_builtin_tensor %transformer_blocks.9.ff_context.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.9.ff_context.net.2.bias = util.global.load @transformer_blocks.9.ff_context.net.2.bias : tensor<3072xbf16> %213 = torch_c.from_builtin_tensor %transformer_blocks.9.ff_context.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.10.norm1.linear.weight = util.global.load @transformer_blocks.10.norm1.linear.weight : tensor<18432x3072xbf16> %214 = torch_c.from_builtin_tensor %transformer_blocks.10.norm1.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.10.norm1.linear.bias = util.global.load @transformer_blocks.10.norm1.linear.bias : tensor<18432xbf16> %215 = torch_c.from_builtin_tensor %transformer_blocks.10.norm1.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.10.norm1_context.linear.weight = util.global.load @transformer_blocks.10.norm1_context.linear.weight : tensor<18432x3072xbf16> %216 = torch_c.from_builtin_tensor %transformer_blocks.10.norm1_context.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.10.norm1_context.linear.bias = util.global.load @transformer_blocks.10.norm1_context.linear.bias : tensor<18432xbf16> %217 = torch_c.from_builtin_tensor %transformer_blocks.10.norm1_context.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.10.attn.norm_q.weight = util.global.load @transformer_blocks.10.attn.norm_q.weight : tensor<128xbf16> %218 = torch_c.from_builtin_tensor %transformer_blocks.10.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.10.attn.norm_k.weight = util.global.load @transformer_blocks.10.attn.norm_k.weight : tensor<128xbf16> %219 = torch_c.from_builtin_tensor %transformer_blocks.10.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.10.attn.to_q.bias = util.global.load @transformer_blocks.10.attn.to_q.bias : tensor<3072xbf16> %220 = torch_c.from_builtin_tensor %transformer_blocks.10.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.10.attn.to_k.bias = util.global.load @transformer_blocks.10.attn.to_k.bias : tensor<3072xbf16> %221 = torch_c.from_builtin_tensor %transformer_blocks.10.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.10.attn.to_v.bias = util.global.load @transformer_blocks.10.attn.to_v.bias : tensor<3072xbf16> %222 = torch_c.from_builtin_tensor %transformer_blocks.10.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.10.attn.add_k_proj.bias = util.global.load @transformer_blocks.10.attn.add_k_proj.bias : tensor<3072xbf16> %223 = torch_c.from_builtin_tensor %transformer_blocks.10.attn.add_k_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.10.attn.add_v_proj.bias = util.global.load @transformer_blocks.10.attn.add_v_proj.bias : tensor<3072xbf16> %224 = torch_c.from_builtin_tensor %transformer_blocks.10.attn.add_v_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.10.attn.add_q_proj.bias = util.global.load @transformer_blocks.10.attn.add_q_proj.bias : tensor<3072xbf16> %225 = torch_c.from_builtin_tensor %transformer_blocks.10.attn.add_q_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.10.attn.to_out.0.bias = util.global.load @transformer_blocks.10.attn.to_out.0.bias : tensor<3072xbf16> %226 = torch_c.from_builtin_tensor %transformer_blocks.10.attn.to_out.0.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.10.attn.to_add_out.bias = util.global.load @transformer_blocks.10.attn.to_add_out.bias : tensor<3072xbf16> %227 = torch_c.from_builtin_tensor %transformer_blocks.10.attn.to_add_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.10.attn.norm_added_q.weight = util.global.load @transformer_blocks.10.attn.norm_added_q.weight : tensor<128xbf16> %228 = torch_c.from_builtin_tensor %transformer_blocks.10.attn.norm_added_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.10.attn.norm_added_k.weight = util.global.load @transformer_blocks.10.attn.norm_added_k.weight : tensor<128xbf16> %229 = torch_c.from_builtin_tensor %transformer_blocks.10.attn.norm_added_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.10.ff.net.0.proj.bias = util.global.load @transformer_blocks.10.ff.net.0.proj.bias : tensor<12288xbf16> %230 = torch_c.from_builtin_tensor %transformer_blocks.10.ff.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.10.ff.net.2.bias = util.global.load @transformer_blocks.10.ff.net.2.bias : tensor<3072xbf16> %231 = torch_c.from_builtin_tensor %transformer_blocks.10.ff.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.10.ff_context.net.0.proj.bias = util.global.load @transformer_blocks.10.ff_context.net.0.proj.bias : tensor<12288xbf16> %232 = torch_c.from_builtin_tensor %transformer_blocks.10.ff_context.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.10.ff_context.net.2.bias = util.global.load @transformer_blocks.10.ff_context.net.2.bias : tensor<3072xbf16> %233 = torch_c.from_builtin_tensor %transformer_blocks.10.ff_context.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.11.norm1.linear.weight = util.global.load @transformer_blocks.11.norm1.linear.weight : tensor<18432x3072xbf16> %234 = torch_c.from_builtin_tensor %transformer_blocks.11.norm1.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.11.norm1.linear.bias = util.global.load @transformer_blocks.11.norm1.linear.bias : tensor<18432xbf16> %235 = torch_c.from_builtin_tensor %transformer_blocks.11.norm1.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.11.norm1_context.linear.weight = util.global.load @transformer_blocks.11.norm1_context.linear.weight : tensor<18432x3072xbf16> %236 = torch_c.from_builtin_tensor %transformer_blocks.11.norm1_context.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.11.norm1_context.linear.bias = util.global.load @transformer_blocks.11.norm1_context.linear.bias : tensor<18432xbf16> %237 = torch_c.from_builtin_tensor %transformer_blocks.11.norm1_context.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.11.attn.norm_q.weight = util.global.load @transformer_blocks.11.attn.norm_q.weight : tensor<128xbf16> %238 = torch_c.from_builtin_tensor %transformer_blocks.11.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.11.attn.norm_k.weight = util.global.load @transformer_blocks.11.attn.norm_k.weight : tensor<128xbf16> %239 = torch_c.from_builtin_tensor %transformer_blocks.11.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.11.attn.to_q.bias = util.global.load @transformer_blocks.11.attn.to_q.bias : tensor<3072xbf16> %240 = torch_c.from_builtin_tensor %transformer_blocks.11.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.11.attn.to_k.bias = util.global.load @transformer_blocks.11.attn.to_k.bias : tensor<3072xbf16> %241 = torch_c.from_builtin_tensor %transformer_blocks.11.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.11.attn.to_v.bias = util.global.load @transformer_blocks.11.attn.to_v.bias : tensor<3072xbf16> %242 = torch_c.from_builtin_tensor %transformer_blocks.11.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.11.attn.add_k_proj.bias = util.global.load @transformer_blocks.11.attn.add_k_proj.bias : tensor<3072xbf16> %243 = torch_c.from_builtin_tensor %transformer_blocks.11.attn.add_k_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.11.attn.add_v_proj.bias = util.global.load @transformer_blocks.11.attn.add_v_proj.bias : tensor<3072xbf16> %244 = torch_c.from_builtin_tensor %transformer_blocks.11.attn.add_v_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.11.attn.add_q_proj.bias = util.global.load @transformer_blocks.11.attn.add_q_proj.bias : tensor<3072xbf16> %245 = torch_c.from_builtin_tensor %transformer_blocks.11.attn.add_q_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.11.attn.to_out.0.bias = util.global.load @transformer_blocks.11.attn.to_out.0.bias : tensor<3072xbf16> %246 = torch_c.from_builtin_tensor %transformer_blocks.11.attn.to_out.0.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.11.attn.to_add_out.bias = util.global.load @transformer_blocks.11.attn.to_add_out.bias : tensor<3072xbf16> %247 = torch_c.from_builtin_tensor %transformer_blocks.11.attn.to_add_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.11.attn.norm_added_q.weight = util.global.load @transformer_blocks.11.attn.norm_added_q.weight : tensor<128xbf16> %248 = torch_c.from_builtin_tensor %transformer_blocks.11.attn.norm_added_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.11.attn.norm_added_k.weight = util.global.load @transformer_blocks.11.attn.norm_added_k.weight : tensor<128xbf16> %249 = torch_c.from_builtin_tensor %transformer_blocks.11.attn.norm_added_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.11.ff.net.0.proj.bias = util.global.load @transformer_blocks.11.ff.net.0.proj.bias : tensor<12288xbf16> %250 = torch_c.from_builtin_tensor %transformer_blocks.11.ff.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.11.ff.net.2.bias = util.global.load @transformer_blocks.11.ff.net.2.bias : tensor<3072xbf16> %251 = torch_c.from_builtin_tensor %transformer_blocks.11.ff.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.11.ff_context.net.0.proj.bias = util.global.load @transformer_blocks.11.ff_context.net.0.proj.bias : tensor<12288xbf16> %252 = torch_c.from_builtin_tensor %transformer_blocks.11.ff_context.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.11.ff_context.net.2.bias = util.global.load @transformer_blocks.11.ff_context.net.2.bias : tensor<3072xbf16> %253 = torch_c.from_builtin_tensor %transformer_blocks.11.ff_context.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.12.norm1.linear.weight = util.global.load @transformer_blocks.12.norm1.linear.weight : tensor<18432x3072xbf16> %254 = torch_c.from_builtin_tensor %transformer_blocks.12.norm1.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.12.norm1.linear.bias = util.global.load @transformer_blocks.12.norm1.linear.bias : tensor<18432xbf16> %255 = torch_c.from_builtin_tensor %transformer_blocks.12.norm1.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.12.norm1_context.linear.weight = util.global.load @transformer_blocks.12.norm1_context.linear.weight : tensor<18432x3072xbf16> %256 = torch_c.from_builtin_tensor %transformer_blocks.12.norm1_context.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.12.norm1_context.linear.bias = util.global.load @transformer_blocks.12.norm1_context.linear.bias : tensor<18432xbf16> %257 = torch_c.from_builtin_tensor %transformer_blocks.12.norm1_context.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.12.attn.norm_q.weight = util.global.load @transformer_blocks.12.attn.norm_q.weight : tensor<128xbf16> %258 = torch_c.from_builtin_tensor %transformer_blocks.12.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.12.attn.norm_k.weight = util.global.load @transformer_blocks.12.attn.norm_k.weight : tensor<128xbf16> %259 = torch_c.from_builtin_tensor %transformer_blocks.12.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.12.attn.to_q.bias = util.global.load @transformer_blocks.12.attn.to_q.bias : tensor<3072xbf16> %260 = torch_c.from_builtin_tensor %transformer_blocks.12.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.12.attn.to_k.bias = util.global.load @transformer_blocks.12.attn.to_k.bias : tensor<3072xbf16> %261 = torch_c.from_builtin_tensor %transformer_blocks.12.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.12.attn.to_v.bias = util.global.load @transformer_blocks.12.attn.to_v.bias : tensor<3072xbf16> %262 = torch_c.from_builtin_tensor %transformer_blocks.12.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.12.attn.add_k_proj.bias = util.global.load @transformer_blocks.12.attn.add_k_proj.bias : tensor<3072xbf16> %263 = torch_c.from_builtin_tensor %transformer_blocks.12.attn.add_k_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.12.attn.add_v_proj.bias = util.global.load @transformer_blocks.12.attn.add_v_proj.bias : tensor<3072xbf16> %264 = torch_c.from_builtin_tensor %transformer_blocks.12.attn.add_v_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.12.attn.add_q_proj.bias = util.global.load @transformer_blocks.12.attn.add_q_proj.bias : tensor<3072xbf16> %265 = torch_c.from_builtin_tensor %transformer_blocks.12.attn.add_q_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.12.attn.to_out.0.bias = util.global.load @transformer_blocks.12.attn.to_out.0.bias : tensor<3072xbf16> %266 = torch_c.from_builtin_tensor %transformer_blocks.12.attn.to_out.0.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.12.attn.to_add_out.bias = util.global.load @transformer_blocks.12.attn.to_add_out.bias : tensor<3072xbf16> %267 = torch_c.from_builtin_tensor %transformer_blocks.12.attn.to_add_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.12.attn.norm_added_q.weight = util.global.load @transformer_blocks.12.attn.norm_added_q.weight : tensor<128xbf16> %268 = torch_c.from_builtin_tensor %transformer_blocks.12.attn.norm_added_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.12.attn.norm_added_k.weight = util.global.load @transformer_blocks.12.attn.norm_added_k.weight : tensor<128xbf16> %269 = torch_c.from_builtin_tensor %transformer_blocks.12.attn.norm_added_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.12.ff.net.0.proj.bias = util.global.load @transformer_blocks.12.ff.net.0.proj.bias : tensor<12288xbf16> %270 = torch_c.from_builtin_tensor %transformer_blocks.12.ff.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.12.ff.net.2.bias = util.global.load @transformer_blocks.12.ff.net.2.bias : tensor<3072xbf16> %271 = torch_c.from_builtin_tensor %transformer_blocks.12.ff.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.12.ff_context.net.0.proj.bias = util.global.load @transformer_blocks.12.ff_context.net.0.proj.bias : tensor<12288xbf16> %272 = torch_c.from_builtin_tensor %transformer_blocks.12.ff_context.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.12.ff_context.net.2.bias = util.global.load @transformer_blocks.12.ff_context.net.2.bias : tensor<3072xbf16> %273 = torch_c.from_builtin_tensor %transformer_blocks.12.ff_context.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.13.norm1.linear.weight = util.global.load @transformer_blocks.13.norm1.linear.weight : tensor<18432x3072xbf16> %274 = torch_c.from_builtin_tensor %transformer_blocks.13.norm1.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.13.norm1.linear.bias = util.global.load @transformer_blocks.13.norm1.linear.bias : tensor<18432xbf16> %275 = torch_c.from_builtin_tensor %transformer_blocks.13.norm1.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.13.norm1_context.linear.weight = util.global.load @transformer_blocks.13.norm1_context.linear.weight : tensor<18432x3072xbf16> %276 = torch_c.from_builtin_tensor %transformer_blocks.13.norm1_context.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.13.norm1_context.linear.bias = util.global.load @transformer_blocks.13.norm1_context.linear.bias : tensor<18432xbf16> %277 = torch_c.from_builtin_tensor %transformer_blocks.13.norm1_context.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.13.attn.norm_q.weight = util.global.load @transformer_blocks.13.attn.norm_q.weight : tensor<128xbf16> %278 = torch_c.from_builtin_tensor %transformer_blocks.13.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.13.attn.norm_k.weight = util.global.load @transformer_blocks.13.attn.norm_k.weight : tensor<128xbf16> %279 = torch_c.from_builtin_tensor %transformer_blocks.13.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.13.attn.to_q.bias = util.global.load @transformer_blocks.13.attn.to_q.bias : tensor<3072xbf16> %280 = torch_c.from_builtin_tensor %transformer_blocks.13.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.13.attn.to_k.bias = util.global.load @transformer_blocks.13.attn.to_k.bias : tensor<3072xbf16> %281 = torch_c.from_builtin_tensor %transformer_blocks.13.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.13.attn.to_v.bias = util.global.load @transformer_blocks.13.attn.to_v.bias : tensor<3072xbf16> %282 = torch_c.from_builtin_tensor %transformer_blocks.13.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.13.attn.add_k_proj.bias = util.global.load @transformer_blocks.13.attn.add_k_proj.bias : tensor<3072xbf16> %283 = torch_c.from_builtin_tensor %transformer_blocks.13.attn.add_k_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.13.attn.add_v_proj.bias = util.global.load @transformer_blocks.13.attn.add_v_proj.bias : tensor<3072xbf16> %284 = torch_c.from_builtin_tensor %transformer_blocks.13.attn.add_v_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.13.attn.add_q_proj.bias = util.global.load @transformer_blocks.13.attn.add_q_proj.bias : tensor<3072xbf16> %285 = torch_c.from_builtin_tensor %transformer_blocks.13.attn.add_q_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.13.attn.to_out.0.bias = util.global.load @transformer_blocks.13.attn.to_out.0.bias : tensor<3072xbf16> %286 = torch_c.from_builtin_tensor %transformer_blocks.13.attn.to_out.0.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.13.attn.to_add_out.bias = util.global.load @transformer_blocks.13.attn.to_add_out.bias : tensor<3072xbf16> %287 = torch_c.from_builtin_tensor %transformer_blocks.13.attn.to_add_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.13.attn.norm_added_q.weight = util.global.load @transformer_blocks.13.attn.norm_added_q.weight : tensor<128xbf16> %288 = torch_c.from_builtin_tensor %transformer_blocks.13.attn.norm_added_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.13.attn.norm_added_k.weight = util.global.load @transformer_blocks.13.attn.norm_added_k.weight : tensor<128xbf16> %289 = torch_c.from_builtin_tensor %transformer_blocks.13.attn.norm_added_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.13.ff.net.0.proj.bias = util.global.load @transformer_blocks.13.ff.net.0.proj.bias : tensor<12288xbf16> %290 = torch_c.from_builtin_tensor %transformer_blocks.13.ff.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.13.ff.net.2.bias = util.global.load @transformer_blocks.13.ff.net.2.bias : tensor<3072xbf16> %291 = torch_c.from_builtin_tensor %transformer_blocks.13.ff.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.13.ff_context.net.0.proj.bias = util.global.load @transformer_blocks.13.ff_context.net.0.proj.bias : tensor<12288xbf16> %292 = torch_c.from_builtin_tensor %transformer_blocks.13.ff_context.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.13.ff_context.net.2.bias = util.global.load @transformer_blocks.13.ff_context.net.2.bias : tensor<3072xbf16> %293 = torch_c.from_builtin_tensor %transformer_blocks.13.ff_context.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.14.norm1.linear.weight = util.global.load @transformer_blocks.14.norm1.linear.weight : tensor<18432x3072xbf16> %294 = torch_c.from_builtin_tensor %transformer_blocks.14.norm1.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.14.norm1.linear.bias = util.global.load @transformer_blocks.14.norm1.linear.bias : tensor<18432xbf16> %295 = torch_c.from_builtin_tensor %transformer_blocks.14.norm1.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.14.norm1_context.linear.weight = util.global.load @transformer_blocks.14.norm1_context.linear.weight : tensor<18432x3072xbf16> %296 = torch_c.from_builtin_tensor %transformer_blocks.14.norm1_context.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.14.norm1_context.linear.bias = util.global.load @transformer_blocks.14.norm1_context.linear.bias : tensor<18432xbf16> %297 = torch_c.from_builtin_tensor %transformer_blocks.14.norm1_context.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.14.attn.norm_q.weight = util.global.load @transformer_blocks.14.attn.norm_q.weight : tensor<128xbf16> %298 = torch_c.from_builtin_tensor %transformer_blocks.14.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.14.attn.norm_k.weight = util.global.load @transformer_blocks.14.attn.norm_k.weight : tensor<128xbf16> %299 = torch_c.from_builtin_tensor %transformer_blocks.14.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.14.attn.to_q.bias = util.global.load @transformer_blocks.14.attn.to_q.bias : tensor<3072xbf16> %300 = torch_c.from_builtin_tensor %transformer_blocks.14.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.14.attn.to_k.bias = util.global.load @transformer_blocks.14.attn.to_k.bias : tensor<3072xbf16> %301 = torch_c.from_builtin_tensor %transformer_blocks.14.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.14.attn.to_v.bias = util.global.load @transformer_blocks.14.attn.to_v.bias : tensor<3072xbf16> %302 = torch_c.from_builtin_tensor %transformer_blocks.14.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.14.attn.add_k_proj.bias = util.global.load @transformer_blocks.14.attn.add_k_proj.bias : tensor<3072xbf16> %303 = torch_c.from_builtin_tensor %transformer_blocks.14.attn.add_k_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.14.attn.add_v_proj.bias = util.global.load @transformer_blocks.14.attn.add_v_proj.bias : tensor<3072xbf16> %304 = torch_c.from_builtin_tensor %transformer_blocks.14.attn.add_v_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.14.attn.add_q_proj.bias = util.global.load @transformer_blocks.14.attn.add_q_proj.bias : tensor<3072xbf16> %305 = torch_c.from_builtin_tensor %transformer_blocks.14.attn.add_q_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.14.attn.to_out.0.bias = util.global.load @transformer_blocks.14.attn.to_out.0.bias : tensor<3072xbf16> %306 = torch_c.from_builtin_tensor %transformer_blocks.14.attn.to_out.0.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.14.attn.to_add_out.bias = util.global.load @transformer_blocks.14.attn.to_add_out.bias : tensor<3072xbf16> %307 = torch_c.from_builtin_tensor %transformer_blocks.14.attn.to_add_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.14.attn.norm_added_q.weight = util.global.load @transformer_blocks.14.attn.norm_added_q.weight : tensor<128xbf16> %308 = torch_c.from_builtin_tensor %transformer_blocks.14.attn.norm_added_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.14.attn.norm_added_k.weight = util.global.load @transformer_blocks.14.attn.norm_added_k.weight : tensor<128xbf16> %309 = torch_c.from_builtin_tensor %transformer_blocks.14.attn.norm_added_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.14.ff.net.0.proj.bias = util.global.load @transformer_blocks.14.ff.net.0.proj.bias : tensor<12288xbf16> %310 = torch_c.from_builtin_tensor %transformer_blocks.14.ff.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.14.ff.net.2.bias = util.global.load @transformer_blocks.14.ff.net.2.bias : tensor<3072xbf16> %311 = torch_c.from_builtin_tensor %transformer_blocks.14.ff.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.14.ff_context.net.0.proj.bias = util.global.load @transformer_blocks.14.ff_context.net.0.proj.bias : tensor<12288xbf16> %312 = torch_c.from_builtin_tensor %transformer_blocks.14.ff_context.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.14.ff_context.net.2.bias = util.global.load @transformer_blocks.14.ff_context.net.2.bias : tensor<3072xbf16> %313 = torch_c.from_builtin_tensor %transformer_blocks.14.ff_context.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.15.norm1.linear.weight = util.global.load @transformer_blocks.15.norm1.linear.weight : tensor<18432x3072xbf16> %314 = torch_c.from_builtin_tensor %transformer_blocks.15.norm1.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.15.norm1.linear.bias = util.global.load @transformer_blocks.15.norm1.linear.bias : tensor<18432xbf16> %315 = torch_c.from_builtin_tensor %transformer_blocks.15.norm1.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.15.norm1_context.linear.weight = util.global.load @transformer_blocks.15.norm1_context.linear.weight : tensor<18432x3072xbf16> %316 = torch_c.from_builtin_tensor %transformer_blocks.15.norm1_context.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.15.norm1_context.linear.bias = util.global.load @transformer_blocks.15.norm1_context.linear.bias : tensor<18432xbf16> %317 = torch_c.from_builtin_tensor %transformer_blocks.15.norm1_context.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.15.attn.norm_q.weight = util.global.load @transformer_blocks.15.attn.norm_q.weight : tensor<128xbf16> %318 = torch_c.from_builtin_tensor %transformer_blocks.15.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.15.attn.norm_k.weight = util.global.load @transformer_blocks.15.attn.norm_k.weight : tensor<128xbf16> %319 = torch_c.from_builtin_tensor %transformer_blocks.15.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.15.attn.to_q.bias = util.global.load @transformer_blocks.15.attn.to_q.bias : tensor<3072xbf16> %320 = torch_c.from_builtin_tensor %transformer_blocks.15.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.15.attn.to_k.bias = util.global.load @transformer_blocks.15.attn.to_k.bias : tensor<3072xbf16> %321 = torch_c.from_builtin_tensor %transformer_blocks.15.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.15.attn.to_v.bias = util.global.load @transformer_blocks.15.attn.to_v.bias : tensor<3072xbf16> %322 = torch_c.from_builtin_tensor %transformer_blocks.15.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.15.attn.add_k_proj.bias = util.global.load @transformer_blocks.15.attn.add_k_proj.bias : tensor<3072xbf16> %323 = torch_c.from_builtin_tensor %transformer_blocks.15.attn.add_k_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.15.attn.add_v_proj.bias = util.global.load @transformer_blocks.15.attn.add_v_proj.bias : tensor<3072xbf16> %324 = torch_c.from_builtin_tensor %transformer_blocks.15.attn.add_v_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.15.attn.add_q_proj.bias = util.global.load @transformer_blocks.15.attn.add_q_proj.bias : tensor<3072xbf16> %325 = torch_c.from_builtin_tensor %transformer_blocks.15.attn.add_q_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.15.attn.to_out.0.bias = util.global.load @transformer_blocks.15.attn.to_out.0.bias : tensor<3072xbf16> %326 = torch_c.from_builtin_tensor %transformer_blocks.15.attn.to_out.0.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.15.attn.to_add_out.bias = util.global.load @transformer_blocks.15.attn.to_add_out.bias : tensor<3072xbf16> %327 = torch_c.from_builtin_tensor %transformer_blocks.15.attn.to_add_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.15.attn.norm_added_q.weight = util.global.load @transformer_blocks.15.attn.norm_added_q.weight : tensor<128xbf16> %328 = torch_c.from_builtin_tensor %transformer_blocks.15.attn.norm_added_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.15.attn.norm_added_k.weight = util.global.load @transformer_blocks.15.attn.norm_added_k.weight : tensor<128xbf16> %329 = torch_c.from_builtin_tensor %transformer_blocks.15.attn.norm_added_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.15.ff.net.0.proj.bias = util.global.load @transformer_blocks.15.ff.net.0.proj.bias : tensor<12288xbf16> %330 = torch_c.from_builtin_tensor %transformer_blocks.15.ff.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.15.ff.net.2.bias = util.global.load @transformer_blocks.15.ff.net.2.bias : tensor<3072xbf16> %331 = torch_c.from_builtin_tensor %transformer_blocks.15.ff.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.15.ff_context.net.0.proj.bias = util.global.load @transformer_blocks.15.ff_context.net.0.proj.bias : tensor<12288xbf16> %332 = torch_c.from_builtin_tensor %transformer_blocks.15.ff_context.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.15.ff_context.net.2.bias = util.global.load @transformer_blocks.15.ff_context.net.2.bias : tensor<3072xbf16> %333 = torch_c.from_builtin_tensor %transformer_blocks.15.ff_context.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.16.norm1.linear.weight = util.global.load @transformer_blocks.16.norm1.linear.weight : tensor<18432x3072xbf16> %334 = torch_c.from_builtin_tensor %transformer_blocks.16.norm1.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.16.norm1.linear.bias = util.global.load @transformer_blocks.16.norm1.linear.bias : tensor<18432xbf16> %335 = torch_c.from_builtin_tensor %transformer_blocks.16.norm1.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.16.norm1_context.linear.weight = util.global.load @transformer_blocks.16.norm1_context.linear.weight : tensor<18432x3072xbf16> %336 = torch_c.from_builtin_tensor %transformer_blocks.16.norm1_context.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.16.norm1_context.linear.bias = util.global.load @transformer_blocks.16.norm1_context.linear.bias : tensor<18432xbf16> %337 = torch_c.from_builtin_tensor %transformer_blocks.16.norm1_context.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.16.attn.norm_q.weight = util.global.load @transformer_blocks.16.attn.norm_q.weight : tensor<128xbf16> %338 = torch_c.from_builtin_tensor %transformer_blocks.16.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.16.attn.norm_k.weight = util.global.load @transformer_blocks.16.attn.norm_k.weight : tensor<128xbf16> %339 = torch_c.from_builtin_tensor %transformer_blocks.16.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.16.attn.to_q.bias = util.global.load @transformer_blocks.16.attn.to_q.bias : tensor<3072xbf16> %340 = torch_c.from_builtin_tensor %transformer_blocks.16.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.16.attn.to_k.bias = util.global.load @transformer_blocks.16.attn.to_k.bias : tensor<3072xbf16> %341 = torch_c.from_builtin_tensor %transformer_blocks.16.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.16.attn.to_v.bias = util.global.load @transformer_blocks.16.attn.to_v.bias : tensor<3072xbf16> %342 = torch_c.from_builtin_tensor %transformer_blocks.16.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.16.attn.add_k_proj.bias = util.global.load @transformer_blocks.16.attn.add_k_proj.bias : tensor<3072xbf16> %343 = torch_c.from_builtin_tensor %transformer_blocks.16.attn.add_k_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.16.attn.add_v_proj.bias = util.global.load @transformer_blocks.16.attn.add_v_proj.bias : tensor<3072xbf16> %344 = torch_c.from_builtin_tensor %transformer_blocks.16.attn.add_v_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.16.attn.add_q_proj.bias = util.global.load @transformer_blocks.16.attn.add_q_proj.bias : tensor<3072xbf16> %345 = torch_c.from_builtin_tensor %transformer_blocks.16.attn.add_q_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.16.attn.to_out.0.bias = util.global.load @transformer_blocks.16.attn.to_out.0.bias : tensor<3072xbf16> %346 = torch_c.from_builtin_tensor %transformer_blocks.16.attn.to_out.0.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.16.attn.to_add_out.bias = util.global.load @transformer_blocks.16.attn.to_add_out.bias : tensor<3072xbf16> %347 = torch_c.from_builtin_tensor %transformer_blocks.16.attn.to_add_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.16.attn.norm_added_q.weight = util.global.load @transformer_blocks.16.attn.norm_added_q.weight : tensor<128xbf16> %348 = torch_c.from_builtin_tensor %transformer_blocks.16.attn.norm_added_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.16.attn.norm_added_k.weight = util.global.load @transformer_blocks.16.attn.norm_added_k.weight : tensor<128xbf16> %349 = torch_c.from_builtin_tensor %transformer_blocks.16.attn.norm_added_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.16.ff.net.0.proj.bias = util.global.load @transformer_blocks.16.ff.net.0.proj.bias : tensor<12288xbf16> %350 = torch_c.from_builtin_tensor %transformer_blocks.16.ff.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.16.ff.net.2.bias = util.global.load @transformer_blocks.16.ff.net.2.bias : tensor<3072xbf16> %351 = torch_c.from_builtin_tensor %transformer_blocks.16.ff.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.16.ff_context.net.0.proj.bias = util.global.load @transformer_blocks.16.ff_context.net.0.proj.bias : tensor<12288xbf16> %352 = torch_c.from_builtin_tensor %transformer_blocks.16.ff_context.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.16.ff_context.net.2.bias = util.global.load @transformer_blocks.16.ff_context.net.2.bias : tensor<3072xbf16> %353 = torch_c.from_builtin_tensor %transformer_blocks.16.ff_context.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.17.norm1.linear.weight = util.global.load @transformer_blocks.17.norm1.linear.weight : tensor<18432x3072xbf16> %354 = torch_c.from_builtin_tensor %transformer_blocks.17.norm1.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.17.norm1.linear.bias = util.global.load @transformer_blocks.17.norm1.linear.bias : tensor<18432xbf16> %355 = torch_c.from_builtin_tensor %transformer_blocks.17.norm1.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.17.norm1_context.linear.weight = util.global.load @transformer_blocks.17.norm1_context.linear.weight : tensor<18432x3072xbf16> %356 = torch_c.from_builtin_tensor %transformer_blocks.17.norm1_context.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.17.norm1_context.linear.bias = util.global.load @transformer_blocks.17.norm1_context.linear.bias : tensor<18432xbf16> %357 = torch_c.from_builtin_tensor %transformer_blocks.17.norm1_context.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.17.attn.norm_q.weight = util.global.load @transformer_blocks.17.attn.norm_q.weight : tensor<128xbf16> %358 = torch_c.from_builtin_tensor %transformer_blocks.17.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.17.attn.norm_k.weight = util.global.load @transformer_blocks.17.attn.norm_k.weight : tensor<128xbf16> %359 = torch_c.from_builtin_tensor %transformer_blocks.17.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.17.attn.to_q.bias = util.global.load @transformer_blocks.17.attn.to_q.bias : tensor<3072xbf16> %360 = torch_c.from_builtin_tensor %transformer_blocks.17.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.17.attn.to_k.bias = util.global.load @transformer_blocks.17.attn.to_k.bias : tensor<3072xbf16> %361 = torch_c.from_builtin_tensor %transformer_blocks.17.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.17.attn.to_v.bias = util.global.load @transformer_blocks.17.attn.to_v.bias : tensor<3072xbf16> %362 = torch_c.from_builtin_tensor %transformer_blocks.17.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.17.attn.add_k_proj.bias = util.global.load @transformer_blocks.17.attn.add_k_proj.bias : tensor<3072xbf16> %363 = torch_c.from_builtin_tensor %transformer_blocks.17.attn.add_k_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.17.attn.add_v_proj.bias = util.global.load @transformer_blocks.17.attn.add_v_proj.bias : tensor<3072xbf16> %364 = torch_c.from_builtin_tensor %transformer_blocks.17.attn.add_v_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.17.attn.add_q_proj.bias = util.global.load @transformer_blocks.17.attn.add_q_proj.bias : tensor<3072xbf16> %365 = torch_c.from_builtin_tensor %transformer_blocks.17.attn.add_q_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.17.attn.to_out.0.bias = util.global.load @transformer_blocks.17.attn.to_out.0.bias : tensor<3072xbf16> %366 = torch_c.from_builtin_tensor %transformer_blocks.17.attn.to_out.0.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.17.attn.to_add_out.bias = util.global.load @transformer_blocks.17.attn.to_add_out.bias : tensor<3072xbf16> %367 = torch_c.from_builtin_tensor %transformer_blocks.17.attn.to_add_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.17.attn.norm_added_q.weight = util.global.load @transformer_blocks.17.attn.norm_added_q.weight : tensor<128xbf16> %368 = torch_c.from_builtin_tensor %transformer_blocks.17.attn.norm_added_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.17.attn.norm_added_k.weight = util.global.load @transformer_blocks.17.attn.norm_added_k.weight : tensor<128xbf16> %369 = torch_c.from_builtin_tensor %transformer_blocks.17.attn.norm_added_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.17.ff.net.0.proj.bias = util.global.load @transformer_blocks.17.ff.net.0.proj.bias : tensor<12288xbf16> %370 = torch_c.from_builtin_tensor %transformer_blocks.17.ff.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.17.ff.net.2.bias = util.global.load @transformer_blocks.17.ff.net.2.bias : tensor<3072xbf16> %371 = torch_c.from_builtin_tensor %transformer_blocks.17.ff.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.17.ff_context.net.0.proj.bias = util.global.load @transformer_blocks.17.ff_context.net.0.proj.bias : tensor<12288xbf16> %372 = torch_c.from_builtin_tensor %transformer_blocks.17.ff_context.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.17.ff_context.net.2.bias = util.global.load @transformer_blocks.17.ff_context.net.2.bias : tensor<3072xbf16> %373 = torch_c.from_builtin_tensor %transformer_blocks.17.ff_context.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.18.norm1.linear.weight = util.global.load @transformer_blocks.18.norm1.linear.weight : tensor<18432x3072xbf16> %374 = torch_c.from_builtin_tensor %transformer_blocks.18.norm1.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.18.norm1.linear.bias = util.global.load @transformer_blocks.18.norm1.linear.bias : tensor<18432xbf16> %375 = torch_c.from_builtin_tensor %transformer_blocks.18.norm1.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.18.norm1_context.linear.weight = util.global.load @transformer_blocks.18.norm1_context.linear.weight : tensor<18432x3072xbf16> %376 = torch_c.from_builtin_tensor %transformer_blocks.18.norm1_context.linear.weight : tensor<18432x3072xbf16> -> !torch.vtensor<[18432,3072],bf16> %transformer_blocks.18.norm1_context.linear.bias = util.global.load @transformer_blocks.18.norm1_context.linear.bias : tensor<18432xbf16> %377 = torch_c.from_builtin_tensor %transformer_blocks.18.norm1_context.linear.bias : tensor<18432xbf16> -> !torch.vtensor<[18432],bf16> %transformer_blocks.18.attn.norm_q.weight = util.global.load @transformer_blocks.18.attn.norm_q.weight : tensor<128xbf16> %378 = torch_c.from_builtin_tensor %transformer_blocks.18.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.18.attn.norm_k.weight = util.global.load @transformer_blocks.18.attn.norm_k.weight : tensor<128xbf16> %379 = torch_c.from_builtin_tensor %transformer_blocks.18.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.18.attn.to_q.bias = util.global.load @transformer_blocks.18.attn.to_q.bias : tensor<3072xbf16> %380 = torch_c.from_builtin_tensor %transformer_blocks.18.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.18.attn.to_k.bias = util.global.load @transformer_blocks.18.attn.to_k.bias : tensor<3072xbf16> %381 = torch_c.from_builtin_tensor %transformer_blocks.18.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.18.attn.to_v.bias = util.global.load @transformer_blocks.18.attn.to_v.bias : tensor<3072xbf16> %382 = torch_c.from_builtin_tensor %transformer_blocks.18.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.18.attn.add_k_proj.bias = util.global.load @transformer_blocks.18.attn.add_k_proj.bias : tensor<3072xbf16> %383 = torch_c.from_builtin_tensor %transformer_blocks.18.attn.add_k_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.18.attn.add_v_proj.bias = util.global.load @transformer_blocks.18.attn.add_v_proj.bias : tensor<3072xbf16> %384 = torch_c.from_builtin_tensor %transformer_blocks.18.attn.add_v_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.18.attn.add_q_proj.bias = util.global.load @transformer_blocks.18.attn.add_q_proj.bias : tensor<3072xbf16> %385 = torch_c.from_builtin_tensor %transformer_blocks.18.attn.add_q_proj.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.18.attn.to_out.0.bias = util.global.load @transformer_blocks.18.attn.to_out.0.bias : tensor<3072xbf16> %386 = torch_c.from_builtin_tensor %transformer_blocks.18.attn.to_out.0.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.18.attn.to_add_out.bias = util.global.load @transformer_blocks.18.attn.to_add_out.bias : tensor<3072xbf16> %387 = torch_c.from_builtin_tensor %transformer_blocks.18.attn.to_add_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.18.attn.norm_added_q.weight = util.global.load @transformer_blocks.18.attn.norm_added_q.weight : tensor<128xbf16> %388 = torch_c.from_builtin_tensor %transformer_blocks.18.attn.norm_added_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.18.attn.norm_added_k.weight = util.global.load @transformer_blocks.18.attn.norm_added_k.weight : tensor<128xbf16> %389 = torch_c.from_builtin_tensor %transformer_blocks.18.attn.norm_added_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %transformer_blocks.18.ff.net.0.proj.bias = util.global.load @transformer_blocks.18.ff.net.0.proj.bias : tensor<12288xbf16> %390 = torch_c.from_builtin_tensor %transformer_blocks.18.ff.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.18.ff.net.2.bias = util.global.load @transformer_blocks.18.ff.net.2.bias : tensor<3072xbf16> %391 = torch_c.from_builtin_tensor %transformer_blocks.18.ff.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %transformer_blocks.18.ff_context.net.0.proj.bias = util.global.load @transformer_blocks.18.ff_context.net.0.proj.bias : tensor<12288xbf16> %392 = torch_c.from_builtin_tensor %transformer_blocks.18.ff_context.net.0.proj.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %transformer_blocks.18.ff_context.net.2.bias = util.global.load @transformer_blocks.18.ff_context.net.2.bias : tensor<3072xbf16> %393 = torch_c.from_builtin_tensor %transformer_blocks.18.ff_context.net.2.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.0.norm.linear.weight = util.global.load @single_transformer_blocks.0.norm.linear.weight : tensor<9216x3072xbf16> %394 = torch_c.from_builtin_tensor %single_transformer_blocks.0.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.0.norm.linear.bias = util.global.load @single_transformer_blocks.0.norm.linear.bias : tensor<9216xbf16> %395 = torch_c.from_builtin_tensor %single_transformer_blocks.0.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.0.proj_mlp.bias = util.global.load @single_transformer_blocks.0.proj_mlp.bias : tensor<12288xbf16> %396 = torch_c.from_builtin_tensor %single_transformer_blocks.0.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.0.proj_out.bias = util.global.load @single_transformer_blocks.0.proj_out.bias : tensor<3072xbf16> %397 = torch_c.from_builtin_tensor %single_transformer_blocks.0.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.0.attn.norm_q.weight = util.global.load @single_transformer_blocks.0.attn.norm_q.weight : tensor<128xbf16> %398 = torch_c.from_builtin_tensor %single_transformer_blocks.0.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.0.attn.norm_k.weight = util.global.load @single_transformer_blocks.0.attn.norm_k.weight : tensor<128xbf16> %399 = torch_c.from_builtin_tensor %single_transformer_blocks.0.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.0.attn.to_q.bias = util.global.load @single_transformer_blocks.0.attn.to_q.bias : tensor<3072xbf16> %400 = torch_c.from_builtin_tensor %single_transformer_blocks.0.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.0.attn.to_k.bias = util.global.load @single_transformer_blocks.0.attn.to_k.bias : tensor<3072xbf16> %401 = torch_c.from_builtin_tensor %single_transformer_blocks.0.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.0.attn.to_v.bias = util.global.load @single_transformer_blocks.0.attn.to_v.bias : tensor<3072xbf16> %402 = torch_c.from_builtin_tensor %single_transformer_blocks.0.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.1.norm.linear.weight = util.global.load @single_transformer_blocks.1.norm.linear.weight : tensor<9216x3072xbf16> %403 = torch_c.from_builtin_tensor %single_transformer_blocks.1.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.1.norm.linear.bias = util.global.load @single_transformer_blocks.1.norm.linear.bias : tensor<9216xbf16> %404 = torch_c.from_builtin_tensor %single_transformer_blocks.1.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.1.proj_mlp.bias = util.global.load @single_transformer_blocks.1.proj_mlp.bias : tensor<12288xbf16> %405 = torch_c.from_builtin_tensor %single_transformer_blocks.1.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.1.proj_out.bias = util.global.load @single_transformer_blocks.1.proj_out.bias : tensor<3072xbf16> %406 = torch_c.from_builtin_tensor %single_transformer_blocks.1.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.1.attn.norm_q.weight = util.global.load @single_transformer_blocks.1.attn.norm_q.weight : tensor<128xbf16> %407 = torch_c.from_builtin_tensor %single_transformer_blocks.1.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.1.attn.norm_k.weight = util.global.load @single_transformer_blocks.1.attn.norm_k.weight : tensor<128xbf16> %408 = torch_c.from_builtin_tensor %single_transformer_blocks.1.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.1.attn.to_q.bias = util.global.load @single_transformer_blocks.1.attn.to_q.bias : tensor<3072xbf16> %409 = torch_c.from_builtin_tensor %single_transformer_blocks.1.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.1.attn.to_k.bias = util.global.load @single_transformer_blocks.1.attn.to_k.bias : tensor<3072xbf16> %410 = torch_c.from_builtin_tensor %single_transformer_blocks.1.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.1.attn.to_v.bias = util.global.load @single_transformer_blocks.1.attn.to_v.bias : tensor<3072xbf16> %411 = torch_c.from_builtin_tensor %single_transformer_blocks.1.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.2.norm.linear.weight = util.global.load @single_transformer_blocks.2.norm.linear.weight : tensor<9216x3072xbf16> %412 = torch_c.from_builtin_tensor %single_transformer_blocks.2.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.2.norm.linear.bias = util.global.load @single_transformer_blocks.2.norm.linear.bias : tensor<9216xbf16> %413 = torch_c.from_builtin_tensor %single_transformer_blocks.2.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.2.proj_mlp.bias = util.global.load @single_transformer_blocks.2.proj_mlp.bias : tensor<12288xbf16> %414 = torch_c.from_builtin_tensor %single_transformer_blocks.2.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.2.proj_out.bias = util.global.load @single_transformer_blocks.2.proj_out.bias : tensor<3072xbf16> %415 = torch_c.from_builtin_tensor %single_transformer_blocks.2.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.2.attn.norm_q.weight = util.global.load @single_transformer_blocks.2.attn.norm_q.weight : tensor<128xbf16> %416 = torch_c.from_builtin_tensor %single_transformer_blocks.2.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.2.attn.norm_k.weight = util.global.load @single_transformer_blocks.2.attn.norm_k.weight : tensor<128xbf16> %417 = torch_c.from_builtin_tensor %single_transformer_blocks.2.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.2.attn.to_q.bias = util.global.load @single_transformer_blocks.2.attn.to_q.bias : tensor<3072xbf16> %418 = torch_c.from_builtin_tensor %single_transformer_blocks.2.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.2.attn.to_k.bias = util.global.load @single_transformer_blocks.2.attn.to_k.bias : tensor<3072xbf16> %419 = torch_c.from_builtin_tensor %single_transformer_blocks.2.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.2.attn.to_v.bias = util.global.load @single_transformer_blocks.2.attn.to_v.bias : tensor<3072xbf16> %420 = torch_c.from_builtin_tensor %single_transformer_blocks.2.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.3.norm.linear.weight = util.global.load @single_transformer_blocks.3.norm.linear.weight : tensor<9216x3072xbf16> %421 = torch_c.from_builtin_tensor %single_transformer_blocks.3.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.3.norm.linear.bias = util.global.load @single_transformer_blocks.3.norm.linear.bias : tensor<9216xbf16> %422 = torch_c.from_builtin_tensor %single_transformer_blocks.3.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.3.proj_mlp.bias = util.global.load @single_transformer_blocks.3.proj_mlp.bias : tensor<12288xbf16> %423 = torch_c.from_builtin_tensor %single_transformer_blocks.3.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.3.proj_out.bias = util.global.load @single_transformer_blocks.3.proj_out.bias : tensor<3072xbf16> %424 = torch_c.from_builtin_tensor %single_transformer_blocks.3.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.3.attn.norm_q.weight = util.global.load @single_transformer_blocks.3.attn.norm_q.weight : tensor<128xbf16> %425 = torch_c.from_builtin_tensor %single_transformer_blocks.3.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.3.attn.norm_k.weight = util.global.load @single_transformer_blocks.3.attn.norm_k.weight : tensor<128xbf16> %426 = torch_c.from_builtin_tensor %single_transformer_blocks.3.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.3.attn.to_q.bias = util.global.load @single_transformer_blocks.3.attn.to_q.bias : tensor<3072xbf16> %427 = torch_c.from_builtin_tensor %single_transformer_blocks.3.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.3.attn.to_k.bias = util.global.load @single_transformer_blocks.3.attn.to_k.bias : tensor<3072xbf16> %428 = torch_c.from_builtin_tensor %single_transformer_blocks.3.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.3.attn.to_v.bias = util.global.load @single_transformer_blocks.3.attn.to_v.bias : tensor<3072xbf16> %429 = torch_c.from_builtin_tensor %single_transformer_blocks.3.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.4.norm.linear.weight = util.global.load @single_transformer_blocks.4.norm.linear.weight : tensor<9216x3072xbf16> %430 = torch_c.from_builtin_tensor %single_transformer_blocks.4.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.4.norm.linear.bias = util.global.load @single_transformer_blocks.4.norm.linear.bias : tensor<9216xbf16> %431 = torch_c.from_builtin_tensor %single_transformer_blocks.4.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.4.proj_mlp.bias = util.global.load @single_transformer_blocks.4.proj_mlp.bias : tensor<12288xbf16> %432 = torch_c.from_builtin_tensor %single_transformer_blocks.4.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.4.proj_out.bias = util.global.load @single_transformer_blocks.4.proj_out.bias : tensor<3072xbf16> %433 = torch_c.from_builtin_tensor %single_transformer_blocks.4.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.4.attn.norm_q.weight = util.global.load @single_transformer_blocks.4.attn.norm_q.weight : tensor<128xbf16> %434 = torch_c.from_builtin_tensor %single_transformer_blocks.4.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.4.attn.norm_k.weight = util.global.load @single_transformer_blocks.4.attn.norm_k.weight : tensor<128xbf16> %435 = torch_c.from_builtin_tensor %single_transformer_blocks.4.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.4.attn.to_q.bias = util.global.load @single_transformer_blocks.4.attn.to_q.bias : tensor<3072xbf16> %436 = torch_c.from_builtin_tensor %single_transformer_blocks.4.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.4.attn.to_k.bias = util.global.load @single_transformer_blocks.4.attn.to_k.bias : tensor<3072xbf16> %437 = torch_c.from_builtin_tensor %single_transformer_blocks.4.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.4.attn.to_v.bias = util.global.load @single_transformer_blocks.4.attn.to_v.bias : tensor<3072xbf16> %438 = torch_c.from_builtin_tensor %single_transformer_blocks.4.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.5.norm.linear.weight = util.global.load @single_transformer_blocks.5.norm.linear.weight : tensor<9216x3072xbf16> %439 = torch_c.from_builtin_tensor %single_transformer_blocks.5.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.5.norm.linear.bias = util.global.load @single_transformer_blocks.5.norm.linear.bias : tensor<9216xbf16> %440 = torch_c.from_builtin_tensor %single_transformer_blocks.5.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.5.proj_mlp.bias = util.global.load @single_transformer_blocks.5.proj_mlp.bias : tensor<12288xbf16> %441 = torch_c.from_builtin_tensor %single_transformer_blocks.5.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.5.proj_out.bias = util.global.load @single_transformer_blocks.5.proj_out.bias : tensor<3072xbf16> %442 = torch_c.from_builtin_tensor %single_transformer_blocks.5.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.5.attn.norm_q.weight = util.global.load @single_transformer_blocks.5.attn.norm_q.weight : tensor<128xbf16> %443 = torch_c.from_builtin_tensor %single_transformer_blocks.5.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.5.attn.norm_k.weight = util.global.load @single_transformer_blocks.5.attn.norm_k.weight : tensor<128xbf16> %444 = torch_c.from_builtin_tensor %single_transformer_blocks.5.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.5.attn.to_q.bias = util.global.load @single_transformer_blocks.5.attn.to_q.bias : tensor<3072xbf16> %445 = torch_c.from_builtin_tensor %single_transformer_blocks.5.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.5.attn.to_k.bias = util.global.load @single_transformer_blocks.5.attn.to_k.bias : tensor<3072xbf16> %446 = torch_c.from_builtin_tensor %single_transformer_blocks.5.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.5.attn.to_v.bias = util.global.load @single_transformer_blocks.5.attn.to_v.bias : tensor<3072xbf16> %447 = torch_c.from_builtin_tensor %single_transformer_blocks.5.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.6.norm.linear.weight = util.global.load @single_transformer_blocks.6.norm.linear.weight : tensor<9216x3072xbf16> %448 = torch_c.from_builtin_tensor %single_transformer_blocks.6.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.6.norm.linear.bias = util.global.load @single_transformer_blocks.6.norm.linear.bias : tensor<9216xbf16> %449 = torch_c.from_builtin_tensor %single_transformer_blocks.6.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.6.proj_mlp.bias = util.global.load @single_transformer_blocks.6.proj_mlp.bias : tensor<12288xbf16> %450 = torch_c.from_builtin_tensor %single_transformer_blocks.6.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.6.proj_out.bias = util.global.load @single_transformer_blocks.6.proj_out.bias : tensor<3072xbf16> %451 = torch_c.from_builtin_tensor %single_transformer_blocks.6.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.6.attn.norm_q.weight = util.global.load @single_transformer_blocks.6.attn.norm_q.weight : tensor<128xbf16> %452 = torch_c.from_builtin_tensor %single_transformer_blocks.6.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.6.attn.norm_k.weight = util.global.load @single_transformer_blocks.6.attn.norm_k.weight : tensor<128xbf16> %453 = torch_c.from_builtin_tensor %single_transformer_blocks.6.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.6.attn.to_q.bias = util.global.load @single_transformer_blocks.6.attn.to_q.bias : tensor<3072xbf16> %454 = torch_c.from_builtin_tensor %single_transformer_blocks.6.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.6.attn.to_k.bias = util.global.load @single_transformer_blocks.6.attn.to_k.bias : tensor<3072xbf16> %455 = torch_c.from_builtin_tensor %single_transformer_blocks.6.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.6.attn.to_v.bias = util.global.load @single_transformer_blocks.6.attn.to_v.bias : tensor<3072xbf16> %456 = torch_c.from_builtin_tensor %single_transformer_blocks.6.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.7.norm.linear.weight = util.global.load @single_transformer_blocks.7.norm.linear.weight : tensor<9216x3072xbf16> %457 = torch_c.from_builtin_tensor %single_transformer_blocks.7.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.7.norm.linear.bias = util.global.load @single_transformer_blocks.7.norm.linear.bias : tensor<9216xbf16> %458 = torch_c.from_builtin_tensor %single_transformer_blocks.7.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.7.proj_mlp.bias = util.global.load @single_transformer_blocks.7.proj_mlp.bias : tensor<12288xbf16> %459 = torch_c.from_builtin_tensor %single_transformer_blocks.7.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.7.proj_out.bias = util.global.load @single_transformer_blocks.7.proj_out.bias : tensor<3072xbf16> %460 = torch_c.from_builtin_tensor %single_transformer_blocks.7.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.7.attn.norm_q.weight = util.global.load @single_transformer_blocks.7.attn.norm_q.weight : tensor<128xbf16> %461 = torch_c.from_builtin_tensor %single_transformer_blocks.7.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.7.attn.norm_k.weight = util.global.load @single_transformer_blocks.7.attn.norm_k.weight : tensor<128xbf16> %462 = torch_c.from_builtin_tensor %single_transformer_blocks.7.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.7.attn.to_q.bias = util.global.load @single_transformer_blocks.7.attn.to_q.bias : tensor<3072xbf16> %463 = torch_c.from_builtin_tensor %single_transformer_blocks.7.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.7.attn.to_k.bias = util.global.load @single_transformer_blocks.7.attn.to_k.bias : tensor<3072xbf16> %464 = torch_c.from_builtin_tensor %single_transformer_blocks.7.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.7.attn.to_v.bias = util.global.load @single_transformer_blocks.7.attn.to_v.bias : tensor<3072xbf16> %465 = torch_c.from_builtin_tensor %single_transformer_blocks.7.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.8.norm.linear.weight = util.global.load @single_transformer_blocks.8.norm.linear.weight : tensor<9216x3072xbf16> %466 = torch_c.from_builtin_tensor %single_transformer_blocks.8.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.8.norm.linear.bias = util.global.load @single_transformer_blocks.8.norm.linear.bias : tensor<9216xbf16> %467 = torch_c.from_builtin_tensor %single_transformer_blocks.8.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.8.proj_mlp.bias = util.global.load @single_transformer_blocks.8.proj_mlp.bias : tensor<12288xbf16> %468 = torch_c.from_builtin_tensor %single_transformer_blocks.8.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.8.proj_out.bias = util.global.load @single_transformer_blocks.8.proj_out.bias : tensor<3072xbf16> %469 = torch_c.from_builtin_tensor %single_transformer_blocks.8.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.8.attn.norm_q.weight = util.global.load @single_transformer_blocks.8.attn.norm_q.weight : tensor<128xbf16> %470 = torch_c.from_builtin_tensor %single_transformer_blocks.8.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.8.attn.norm_k.weight = util.global.load @single_transformer_blocks.8.attn.norm_k.weight : tensor<128xbf16> %471 = torch_c.from_builtin_tensor %single_transformer_blocks.8.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.8.attn.to_q.bias = util.global.load @single_transformer_blocks.8.attn.to_q.bias : tensor<3072xbf16> %472 = torch_c.from_builtin_tensor %single_transformer_blocks.8.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.8.attn.to_k.bias = util.global.load @single_transformer_blocks.8.attn.to_k.bias : tensor<3072xbf16> %473 = torch_c.from_builtin_tensor %single_transformer_blocks.8.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.8.attn.to_v.bias = util.global.load @single_transformer_blocks.8.attn.to_v.bias : tensor<3072xbf16> %474 = torch_c.from_builtin_tensor %single_transformer_blocks.8.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.9.norm.linear.weight = util.global.load @single_transformer_blocks.9.norm.linear.weight : tensor<9216x3072xbf16> %475 = torch_c.from_builtin_tensor %single_transformer_blocks.9.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.9.norm.linear.bias = util.global.load @single_transformer_blocks.9.norm.linear.bias : tensor<9216xbf16> %476 = torch_c.from_builtin_tensor %single_transformer_blocks.9.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.9.proj_mlp.bias = util.global.load @single_transformer_blocks.9.proj_mlp.bias : tensor<12288xbf16> %477 = torch_c.from_builtin_tensor %single_transformer_blocks.9.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.9.proj_out.bias = util.global.load @single_transformer_blocks.9.proj_out.bias : tensor<3072xbf16> %478 = torch_c.from_builtin_tensor %single_transformer_blocks.9.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.9.attn.norm_q.weight = util.global.load @single_transformer_blocks.9.attn.norm_q.weight : tensor<128xbf16> %479 = torch_c.from_builtin_tensor %single_transformer_blocks.9.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.9.attn.norm_k.weight = util.global.load @single_transformer_blocks.9.attn.norm_k.weight : tensor<128xbf16> %480 = torch_c.from_builtin_tensor %single_transformer_blocks.9.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.9.attn.to_q.bias = util.global.load @single_transformer_blocks.9.attn.to_q.bias : tensor<3072xbf16> %481 = torch_c.from_builtin_tensor %single_transformer_blocks.9.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.9.attn.to_k.bias = util.global.load @single_transformer_blocks.9.attn.to_k.bias : tensor<3072xbf16> %482 = torch_c.from_builtin_tensor %single_transformer_blocks.9.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.9.attn.to_v.bias = util.global.load @single_transformer_blocks.9.attn.to_v.bias : tensor<3072xbf16> %483 = torch_c.from_builtin_tensor %single_transformer_blocks.9.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.10.norm.linear.weight = util.global.load @single_transformer_blocks.10.norm.linear.weight : tensor<9216x3072xbf16> %484 = torch_c.from_builtin_tensor %single_transformer_blocks.10.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.10.norm.linear.bias = util.global.load @single_transformer_blocks.10.norm.linear.bias : tensor<9216xbf16> %485 = torch_c.from_builtin_tensor %single_transformer_blocks.10.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.10.proj_mlp.bias = util.global.load @single_transformer_blocks.10.proj_mlp.bias : tensor<12288xbf16> %486 = torch_c.from_builtin_tensor %single_transformer_blocks.10.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.10.proj_out.bias = util.global.load @single_transformer_blocks.10.proj_out.bias : tensor<3072xbf16> %487 = torch_c.from_builtin_tensor %single_transformer_blocks.10.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.10.attn.norm_q.weight = util.global.load @single_transformer_blocks.10.attn.norm_q.weight : tensor<128xbf16> %488 = torch_c.from_builtin_tensor %single_transformer_blocks.10.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.10.attn.norm_k.weight = util.global.load @single_transformer_blocks.10.attn.norm_k.weight : tensor<128xbf16> %489 = torch_c.from_builtin_tensor %single_transformer_blocks.10.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.10.attn.to_q.bias = util.global.load @single_transformer_blocks.10.attn.to_q.bias : tensor<3072xbf16> %490 = torch_c.from_builtin_tensor %single_transformer_blocks.10.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.10.attn.to_k.bias = util.global.load @single_transformer_blocks.10.attn.to_k.bias : tensor<3072xbf16> %491 = torch_c.from_builtin_tensor %single_transformer_blocks.10.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.10.attn.to_v.bias = util.global.load @single_transformer_blocks.10.attn.to_v.bias : tensor<3072xbf16> %492 = torch_c.from_builtin_tensor %single_transformer_blocks.10.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.11.norm.linear.weight = util.global.load @single_transformer_blocks.11.norm.linear.weight : tensor<9216x3072xbf16> %493 = torch_c.from_builtin_tensor %single_transformer_blocks.11.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.11.norm.linear.bias = util.global.load @single_transformer_blocks.11.norm.linear.bias : tensor<9216xbf16> %494 = torch_c.from_builtin_tensor %single_transformer_blocks.11.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.11.proj_mlp.bias = util.global.load @single_transformer_blocks.11.proj_mlp.bias : tensor<12288xbf16> %495 = torch_c.from_builtin_tensor %single_transformer_blocks.11.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.11.proj_out.bias = util.global.load @single_transformer_blocks.11.proj_out.bias : tensor<3072xbf16> %496 = torch_c.from_builtin_tensor %single_transformer_blocks.11.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.11.attn.norm_q.weight = util.global.load @single_transformer_blocks.11.attn.norm_q.weight : tensor<128xbf16> %497 = torch_c.from_builtin_tensor %single_transformer_blocks.11.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.11.attn.norm_k.weight = util.global.load @single_transformer_blocks.11.attn.norm_k.weight : tensor<128xbf16> %498 = torch_c.from_builtin_tensor %single_transformer_blocks.11.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.11.attn.to_q.bias = util.global.load @single_transformer_blocks.11.attn.to_q.bias : tensor<3072xbf16> %499 = torch_c.from_builtin_tensor %single_transformer_blocks.11.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.11.attn.to_k.bias = util.global.load @single_transformer_blocks.11.attn.to_k.bias : tensor<3072xbf16> %500 = torch_c.from_builtin_tensor %single_transformer_blocks.11.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.11.attn.to_v.bias = util.global.load @single_transformer_blocks.11.attn.to_v.bias : tensor<3072xbf16> %501 = torch_c.from_builtin_tensor %single_transformer_blocks.11.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.12.norm.linear.weight = util.global.load @single_transformer_blocks.12.norm.linear.weight : tensor<9216x3072xbf16> %502 = torch_c.from_builtin_tensor %single_transformer_blocks.12.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.12.norm.linear.bias = util.global.load @single_transformer_blocks.12.norm.linear.bias : tensor<9216xbf16> %503 = torch_c.from_builtin_tensor %single_transformer_blocks.12.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.12.proj_mlp.bias = util.global.load @single_transformer_blocks.12.proj_mlp.bias : tensor<12288xbf16> %504 = torch_c.from_builtin_tensor %single_transformer_blocks.12.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.12.proj_out.bias = util.global.load @single_transformer_blocks.12.proj_out.bias : tensor<3072xbf16> %505 = torch_c.from_builtin_tensor %single_transformer_blocks.12.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.12.attn.norm_q.weight = util.global.load @single_transformer_blocks.12.attn.norm_q.weight : tensor<128xbf16> %506 = torch_c.from_builtin_tensor %single_transformer_blocks.12.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.12.attn.norm_k.weight = util.global.load @single_transformer_blocks.12.attn.norm_k.weight : tensor<128xbf16> %507 = torch_c.from_builtin_tensor %single_transformer_blocks.12.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.12.attn.to_q.bias = util.global.load @single_transformer_blocks.12.attn.to_q.bias : tensor<3072xbf16> %508 = torch_c.from_builtin_tensor %single_transformer_blocks.12.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.12.attn.to_k.bias = util.global.load @single_transformer_blocks.12.attn.to_k.bias : tensor<3072xbf16> %509 = torch_c.from_builtin_tensor %single_transformer_blocks.12.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.12.attn.to_v.bias = util.global.load @single_transformer_blocks.12.attn.to_v.bias : tensor<3072xbf16> %510 = torch_c.from_builtin_tensor %single_transformer_blocks.12.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.13.norm.linear.weight = util.global.load @single_transformer_blocks.13.norm.linear.weight : tensor<9216x3072xbf16> %511 = torch_c.from_builtin_tensor %single_transformer_blocks.13.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.13.norm.linear.bias = util.global.load @single_transformer_blocks.13.norm.linear.bias : tensor<9216xbf16> %512 = torch_c.from_builtin_tensor %single_transformer_blocks.13.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.13.proj_mlp.bias = util.global.load @single_transformer_blocks.13.proj_mlp.bias : tensor<12288xbf16> %513 = torch_c.from_builtin_tensor %single_transformer_blocks.13.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.13.proj_out.bias = util.global.load @single_transformer_blocks.13.proj_out.bias : tensor<3072xbf16> %514 = torch_c.from_builtin_tensor %single_transformer_blocks.13.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.13.attn.norm_q.weight = util.global.load @single_transformer_blocks.13.attn.norm_q.weight : tensor<128xbf16> %515 = torch_c.from_builtin_tensor %single_transformer_blocks.13.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.13.attn.norm_k.weight = util.global.load @single_transformer_blocks.13.attn.norm_k.weight : tensor<128xbf16> %516 = torch_c.from_builtin_tensor %single_transformer_blocks.13.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.13.attn.to_q.bias = util.global.load @single_transformer_blocks.13.attn.to_q.bias : tensor<3072xbf16> %517 = torch_c.from_builtin_tensor %single_transformer_blocks.13.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.13.attn.to_k.bias = util.global.load @single_transformer_blocks.13.attn.to_k.bias : tensor<3072xbf16> %518 = torch_c.from_builtin_tensor %single_transformer_blocks.13.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.13.attn.to_v.bias = util.global.load @single_transformer_blocks.13.attn.to_v.bias : tensor<3072xbf16> %519 = torch_c.from_builtin_tensor %single_transformer_blocks.13.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.14.norm.linear.weight = util.global.load @single_transformer_blocks.14.norm.linear.weight : tensor<9216x3072xbf16> %520 = torch_c.from_builtin_tensor %single_transformer_blocks.14.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.14.norm.linear.bias = util.global.load @single_transformer_blocks.14.norm.linear.bias : tensor<9216xbf16> %521 = torch_c.from_builtin_tensor %single_transformer_blocks.14.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.14.proj_mlp.bias = util.global.load @single_transformer_blocks.14.proj_mlp.bias : tensor<12288xbf16> %522 = torch_c.from_builtin_tensor %single_transformer_blocks.14.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.14.proj_out.bias = util.global.load @single_transformer_blocks.14.proj_out.bias : tensor<3072xbf16> %523 = torch_c.from_builtin_tensor %single_transformer_blocks.14.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.14.attn.norm_q.weight = util.global.load @single_transformer_blocks.14.attn.norm_q.weight : tensor<128xbf16> %524 = torch_c.from_builtin_tensor %single_transformer_blocks.14.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.14.attn.norm_k.weight = util.global.load @single_transformer_blocks.14.attn.norm_k.weight : tensor<128xbf16> %525 = torch_c.from_builtin_tensor %single_transformer_blocks.14.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.14.attn.to_q.bias = util.global.load @single_transformer_blocks.14.attn.to_q.bias : tensor<3072xbf16> %526 = torch_c.from_builtin_tensor %single_transformer_blocks.14.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.14.attn.to_k.bias = util.global.load @single_transformer_blocks.14.attn.to_k.bias : tensor<3072xbf16> %527 = torch_c.from_builtin_tensor %single_transformer_blocks.14.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.14.attn.to_v.bias = util.global.load @single_transformer_blocks.14.attn.to_v.bias : tensor<3072xbf16> %528 = torch_c.from_builtin_tensor %single_transformer_blocks.14.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.15.norm.linear.weight = util.global.load @single_transformer_blocks.15.norm.linear.weight : tensor<9216x3072xbf16> %529 = torch_c.from_builtin_tensor %single_transformer_blocks.15.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.15.norm.linear.bias = util.global.load @single_transformer_blocks.15.norm.linear.bias : tensor<9216xbf16> %530 = torch_c.from_builtin_tensor %single_transformer_blocks.15.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.15.proj_mlp.bias = util.global.load @single_transformer_blocks.15.proj_mlp.bias : tensor<12288xbf16> %531 = torch_c.from_builtin_tensor %single_transformer_blocks.15.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.15.proj_out.bias = util.global.load @single_transformer_blocks.15.proj_out.bias : tensor<3072xbf16> %532 = torch_c.from_builtin_tensor %single_transformer_blocks.15.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.15.attn.norm_q.weight = util.global.load @single_transformer_blocks.15.attn.norm_q.weight : tensor<128xbf16> %533 = torch_c.from_builtin_tensor %single_transformer_blocks.15.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.15.attn.norm_k.weight = util.global.load @single_transformer_blocks.15.attn.norm_k.weight : tensor<128xbf16> %534 = torch_c.from_builtin_tensor %single_transformer_blocks.15.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.15.attn.to_q.bias = util.global.load @single_transformer_blocks.15.attn.to_q.bias : tensor<3072xbf16> %535 = torch_c.from_builtin_tensor %single_transformer_blocks.15.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.15.attn.to_k.bias = util.global.load @single_transformer_blocks.15.attn.to_k.bias : tensor<3072xbf16> %536 = torch_c.from_builtin_tensor %single_transformer_blocks.15.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.15.attn.to_v.bias = util.global.load @single_transformer_blocks.15.attn.to_v.bias : tensor<3072xbf16> %537 = torch_c.from_builtin_tensor %single_transformer_blocks.15.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.16.norm.linear.weight = util.global.load @single_transformer_blocks.16.norm.linear.weight : tensor<9216x3072xbf16> %538 = torch_c.from_builtin_tensor %single_transformer_blocks.16.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.16.norm.linear.bias = util.global.load @single_transformer_blocks.16.norm.linear.bias : tensor<9216xbf16> %539 = torch_c.from_builtin_tensor %single_transformer_blocks.16.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.16.proj_mlp.bias = util.global.load @single_transformer_blocks.16.proj_mlp.bias : tensor<12288xbf16> %540 = torch_c.from_builtin_tensor %single_transformer_blocks.16.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.16.proj_out.bias = util.global.load @single_transformer_blocks.16.proj_out.bias : tensor<3072xbf16> %541 = torch_c.from_builtin_tensor %single_transformer_blocks.16.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.16.attn.norm_q.weight = util.global.load @single_transformer_blocks.16.attn.norm_q.weight : tensor<128xbf16> %542 = torch_c.from_builtin_tensor %single_transformer_blocks.16.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.16.attn.norm_k.weight = util.global.load @single_transformer_blocks.16.attn.norm_k.weight : tensor<128xbf16> %543 = torch_c.from_builtin_tensor %single_transformer_blocks.16.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.16.attn.to_q.bias = util.global.load @single_transformer_blocks.16.attn.to_q.bias : tensor<3072xbf16> %544 = torch_c.from_builtin_tensor %single_transformer_blocks.16.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.16.attn.to_k.bias = util.global.load @single_transformer_blocks.16.attn.to_k.bias : tensor<3072xbf16> %545 = torch_c.from_builtin_tensor %single_transformer_blocks.16.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.16.attn.to_v.bias = util.global.load @single_transformer_blocks.16.attn.to_v.bias : tensor<3072xbf16> %546 = torch_c.from_builtin_tensor %single_transformer_blocks.16.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.17.norm.linear.weight = util.global.load @single_transformer_blocks.17.norm.linear.weight : tensor<9216x3072xbf16> %547 = torch_c.from_builtin_tensor %single_transformer_blocks.17.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.17.norm.linear.bias = util.global.load @single_transformer_blocks.17.norm.linear.bias : tensor<9216xbf16> %548 = torch_c.from_builtin_tensor %single_transformer_blocks.17.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.17.proj_mlp.bias = util.global.load @single_transformer_blocks.17.proj_mlp.bias : tensor<12288xbf16> %549 = torch_c.from_builtin_tensor %single_transformer_blocks.17.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.17.proj_out.bias = util.global.load @single_transformer_blocks.17.proj_out.bias : tensor<3072xbf16> %550 = torch_c.from_builtin_tensor %single_transformer_blocks.17.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.17.attn.norm_q.weight = util.global.load @single_transformer_blocks.17.attn.norm_q.weight : tensor<128xbf16> %551 = torch_c.from_builtin_tensor %single_transformer_blocks.17.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.17.attn.norm_k.weight = util.global.load @single_transformer_blocks.17.attn.norm_k.weight : tensor<128xbf16> %552 = torch_c.from_builtin_tensor %single_transformer_blocks.17.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.17.attn.to_q.bias = util.global.load @single_transformer_blocks.17.attn.to_q.bias : tensor<3072xbf16> %553 = torch_c.from_builtin_tensor %single_transformer_blocks.17.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.17.attn.to_k.bias = util.global.load @single_transformer_blocks.17.attn.to_k.bias : tensor<3072xbf16> %554 = torch_c.from_builtin_tensor %single_transformer_blocks.17.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.17.attn.to_v.bias = util.global.load @single_transformer_blocks.17.attn.to_v.bias : tensor<3072xbf16> %555 = torch_c.from_builtin_tensor %single_transformer_blocks.17.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.18.norm.linear.weight = util.global.load @single_transformer_blocks.18.norm.linear.weight : tensor<9216x3072xbf16> %556 = torch_c.from_builtin_tensor %single_transformer_blocks.18.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.18.norm.linear.bias = util.global.load @single_transformer_blocks.18.norm.linear.bias : tensor<9216xbf16> %557 = torch_c.from_builtin_tensor %single_transformer_blocks.18.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.18.proj_mlp.bias = util.global.load @single_transformer_blocks.18.proj_mlp.bias : tensor<12288xbf16> %558 = torch_c.from_builtin_tensor %single_transformer_blocks.18.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.18.proj_out.bias = util.global.load @single_transformer_blocks.18.proj_out.bias : tensor<3072xbf16> %559 = torch_c.from_builtin_tensor %single_transformer_blocks.18.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.18.attn.norm_q.weight = util.global.load @single_transformer_blocks.18.attn.norm_q.weight : tensor<128xbf16> %560 = torch_c.from_builtin_tensor %single_transformer_blocks.18.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.18.attn.norm_k.weight = util.global.load @single_transformer_blocks.18.attn.norm_k.weight : tensor<128xbf16> %561 = torch_c.from_builtin_tensor %single_transformer_blocks.18.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.18.attn.to_q.bias = util.global.load @single_transformer_blocks.18.attn.to_q.bias : tensor<3072xbf16> %562 = torch_c.from_builtin_tensor %single_transformer_blocks.18.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.18.attn.to_k.bias = util.global.load @single_transformer_blocks.18.attn.to_k.bias : tensor<3072xbf16> %563 = torch_c.from_builtin_tensor %single_transformer_blocks.18.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.18.attn.to_v.bias = util.global.load @single_transformer_blocks.18.attn.to_v.bias : tensor<3072xbf16> %564 = torch_c.from_builtin_tensor %single_transformer_blocks.18.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.19.norm.linear.weight = util.global.load @single_transformer_blocks.19.norm.linear.weight : tensor<9216x3072xbf16> %565 = torch_c.from_builtin_tensor %single_transformer_blocks.19.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.19.norm.linear.bias = util.global.load @single_transformer_blocks.19.norm.linear.bias : tensor<9216xbf16> %566 = torch_c.from_builtin_tensor %single_transformer_blocks.19.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.19.proj_mlp.bias = util.global.load @single_transformer_blocks.19.proj_mlp.bias : tensor<12288xbf16> %567 = torch_c.from_builtin_tensor %single_transformer_blocks.19.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.19.proj_out.bias = util.global.load @single_transformer_blocks.19.proj_out.bias : tensor<3072xbf16> %568 = torch_c.from_builtin_tensor %single_transformer_blocks.19.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.19.attn.norm_q.weight = util.global.load @single_transformer_blocks.19.attn.norm_q.weight : tensor<128xbf16> %569 = torch_c.from_builtin_tensor %single_transformer_blocks.19.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.19.attn.norm_k.weight = util.global.load @single_transformer_blocks.19.attn.norm_k.weight : tensor<128xbf16> %570 = torch_c.from_builtin_tensor %single_transformer_blocks.19.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.19.attn.to_q.bias = util.global.load @single_transformer_blocks.19.attn.to_q.bias : tensor<3072xbf16> %571 = torch_c.from_builtin_tensor %single_transformer_blocks.19.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.19.attn.to_k.bias = util.global.load @single_transformer_blocks.19.attn.to_k.bias : tensor<3072xbf16> %572 = torch_c.from_builtin_tensor %single_transformer_blocks.19.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.19.attn.to_v.bias = util.global.load @single_transformer_blocks.19.attn.to_v.bias : tensor<3072xbf16> %573 = torch_c.from_builtin_tensor %single_transformer_blocks.19.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.20.norm.linear.weight = util.global.load @single_transformer_blocks.20.norm.linear.weight : tensor<9216x3072xbf16> %574 = torch_c.from_builtin_tensor %single_transformer_blocks.20.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.20.norm.linear.bias = util.global.load @single_transformer_blocks.20.norm.linear.bias : tensor<9216xbf16> %575 = torch_c.from_builtin_tensor %single_transformer_blocks.20.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.20.proj_mlp.bias = util.global.load @single_transformer_blocks.20.proj_mlp.bias : tensor<12288xbf16> %576 = torch_c.from_builtin_tensor %single_transformer_blocks.20.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.20.proj_out.bias = util.global.load @single_transformer_blocks.20.proj_out.bias : tensor<3072xbf16> %577 = torch_c.from_builtin_tensor %single_transformer_blocks.20.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.20.attn.norm_q.weight = util.global.load @single_transformer_blocks.20.attn.norm_q.weight : tensor<128xbf16> %578 = torch_c.from_builtin_tensor %single_transformer_blocks.20.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.20.attn.norm_k.weight = util.global.load @single_transformer_blocks.20.attn.norm_k.weight : tensor<128xbf16> %579 = torch_c.from_builtin_tensor %single_transformer_blocks.20.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.20.attn.to_q.bias = util.global.load @single_transformer_blocks.20.attn.to_q.bias : tensor<3072xbf16> %580 = torch_c.from_builtin_tensor %single_transformer_blocks.20.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.20.attn.to_k.bias = util.global.load @single_transformer_blocks.20.attn.to_k.bias : tensor<3072xbf16> %581 = torch_c.from_builtin_tensor %single_transformer_blocks.20.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.20.attn.to_v.bias = util.global.load @single_transformer_blocks.20.attn.to_v.bias : tensor<3072xbf16> %582 = torch_c.from_builtin_tensor %single_transformer_blocks.20.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.21.norm.linear.weight = util.global.load @single_transformer_blocks.21.norm.linear.weight : tensor<9216x3072xbf16> %583 = torch_c.from_builtin_tensor %single_transformer_blocks.21.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.21.norm.linear.bias = util.global.load @single_transformer_blocks.21.norm.linear.bias : tensor<9216xbf16> %584 = torch_c.from_builtin_tensor %single_transformer_blocks.21.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.21.proj_mlp.bias = util.global.load @single_transformer_blocks.21.proj_mlp.bias : tensor<12288xbf16> %585 = torch_c.from_builtin_tensor %single_transformer_blocks.21.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.21.proj_out.bias = util.global.load @single_transformer_blocks.21.proj_out.bias : tensor<3072xbf16> %586 = torch_c.from_builtin_tensor %single_transformer_blocks.21.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.21.attn.norm_q.weight = util.global.load @single_transformer_blocks.21.attn.norm_q.weight : tensor<128xbf16> %587 = torch_c.from_builtin_tensor %single_transformer_blocks.21.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.21.attn.norm_k.weight = util.global.load @single_transformer_blocks.21.attn.norm_k.weight : tensor<128xbf16> %588 = torch_c.from_builtin_tensor %single_transformer_blocks.21.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.21.attn.to_q.bias = util.global.load @single_transformer_blocks.21.attn.to_q.bias : tensor<3072xbf16> %589 = torch_c.from_builtin_tensor %single_transformer_blocks.21.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.21.attn.to_k.bias = util.global.load @single_transformer_blocks.21.attn.to_k.bias : tensor<3072xbf16> %590 = torch_c.from_builtin_tensor %single_transformer_blocks.21.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.21.attn.to_v.bias = util.global.load @single_transformer_blocks.21.attn.to_v.bias : tensor<3072xbf16> %591 = torch_c.from_builtin_tensor %single_transformer_blocks.21.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.22.norm.linear.weight = util.global.load @single_transformer_blocks.22.norm.linear.weight : tensor<9216x3072xbf16> %592 = torch_c.from_builtin_tensor %single_transformer_blocks.22.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.22.norm.linear.bias = util.global.load @single_transformer_blocks.22.norm.linear.bias : tensor<9216xbf16> %593 = torch_c.from_builtin_tensor %single_transformer_blocks.22.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.22.proj_mlp.bias = util.global.load @single_transformer_blocks.22.proj_mlp.bias : tensor<12288xbf16> %594 = torch_c.from_builtin_tensor %single_transformer_blocks.22.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.22.proj_out.bias = util.global.load @single_transformer_blocks.22.proj_out.bias : tensor<3072xbf16> %595 = torch_c.from_builtin_tensor %single_transformer_blocks.22.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.22.attn.norm_q.weight = util.global.load @single_transformer_blocks.22.attn.norm_q.weight : tensor<128xbf16> %596 = torch_c.from_builtin_tensor %single_transformer_blocks.22.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.22.attn.norm_k.weight = util.global.load @single_transformer_blocks.22.attn.norm_k.weight : tensor<128xbf16> %597 = torch_c.from_builtin_tensor %single_transformer_blocks.22.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.22.attn.to_q.bias = util.global.load @single_transformer_blocks.22.attn.to_q.bias : tensor<3072xbf16> %598 = torch_c.from_builtin_tensor %single_transformer_blocks.22.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.22.attn.to_k.bias = util.global.load @single_transformer_blocks.22.attn.to_k.bias : tensor<3072xbf16> %599 = torch_c.from_builtin_tensor %single_transformer_blocks.22.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.22.attn.to_v.bias = util.global.load @single_transformer_blocks.22.attn.to_v.bias : tensor<3072xbf16> %600 = torch_c.from_builtin_tensor %single_transformer_blocks.22.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.23.norm.linear.weight = util.global.load @single_transformer_blocks.23.norm.linear.weight : tensor<9216x3072xbf16> %601 = torch_c.from_builtin_tensor %single_transformer_blocks.23.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.23.norm.linear.bias = util.global.load @single_transformer_blocks.23.norm.linear.bias : tensor<9216xbf16> %602 = torch_c.from_builtin_tensor %single_transformer_blocks.23.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.23.proj_mlp.bias = util.global.load @single_transformer_blocks.23.proj_mlp.bias : tensor<12288xbf16> %603 = torch_c.from_builtin_tensor %single_transformer_blocks.23.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.23.proj_out.bias = util.global.load @single_transformer_blocks.23.proj_out.bias : tensor<3072xbf16> %604 = torch_c.from_builtin_tensor %single_transformer_blocks.23.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.23.attn.norm_q.weight = util.global.load @single_transformer_blocks.23.attn.norm_q.weight : tensor<128xbf16> %605 = torch_c.from_builtin_tensor %single_transformer_blocks.23.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.23.attn.norm_k.weight = util.global.load @single_transformer_blocks.23.attn.norm_k.weight : tensor<128xbf16> %606 = torch_c.from_builtin_tensor %single_transformer_blocks.23.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.23.attn.to_q.bias = util.global.load @single_transformer_blocks.23.attn.to_q.bias : tensor<3072xbf16> %607 = torch_c.from_builtin_tensor %single_transformer_blocks.23.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.23.attn.to_k.bias = util.global.load @single_transformer_blocks.23.attn.to_k.bias : tensor<3072xbf16> %608 = torch_c.from_builtin_tensor %single_transformer_blocks.23.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.23.attn.to_v.bias = util.global.load @single_transformer_blocks.23.attn.to_v.bias : tensor<3072xbf16> %609 = torch_c.from_builtin_tensor %single_transformer_blocks.23.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.24.norm.linear.weight = util.global.load @single_transformer_blocks.24.norm.linear.weight : tensor<9216x3072xbf16> %610 = torch_c.from_builtin_tensor %single_transformer_blocks.24.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.24.norm.linear.bias = util.global.load @single_transformer_blocks.24.norm.linear.bias : tensor<9216xbf16> %611 = torch_c.from_builtin_tensor %single_transformer_blocks.24.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.24.proj_mlp.bias = util.global.load @single_transformer_blocks.24.proj_mlp.bias : tensor<12288xbf16> %612 = torch_c.from_builtin_tensor %single_transformer_blocks.24.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.24.proj_out.bias = util.global.load @single_transformer_blocks.24.proj_out.bias : tensor<3072xbf16> %613 = torch_c.from_builtin_tensor %single_transformer_blocks.24.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.24.attn.norm_q.weight = util.global.load @single_transformer_blocks.24.attn.norm_q.weight : tensor<128xbf16> %614 = torch_c.from_builtin_tensor %single_transformer_blocks.24.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.24.attn.norm_k.weight = util.global.load @single_transformer_blocks.24.attn.norm_k.weight : tensor<128xbf16> %615 = torch_c.from_builtin_tensor %single_transformer_blocks.24.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.24.attn.to_q.bias = util.global.load @single_transformer_blocks.24.attn.to_q.bias : tensor<3072xbf16> %616 = torch_c.from_builtin_tensor %single_transformer_blocks.24.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.24.attn.to_k.bias = util.global.load @single_transformer_blocks.24.attn.to_k.bias : tensor<3072xbf16> %617 = torch_c.from_builtin_tensor %single_transformer_blocks.24.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.24.attn.to_v.bias = util.global.load @single_transformer_blocks.24.attn.to_v.bias : tensor<3072xbf16> %618 = torch_c.from_builtin_tensor %single_transformer_blocks.24.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.25.norm.linear.weight = util.global.load @single_transformer_blocks.25.norm.linear.weight : tensor<9216x3072xbf16> %619 = torch_c.from_builtin_tensor %single_transformer_blocks.25.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.25.norm.linear.bias = util.global.load @single_transformer_blocks.25.norm.linear.bias : tensor<9216xbf16> %620 = torch_c.from_builtin_tensor %single_transformer_blocks.25.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.25.proj_mlp.bias = util.global.load @single_transformer_blocks.25.proj_mlp.bias : tensor<12288xbf16> %621 = torch_c.from_builtin_tensor %single_transformer_blocks.25.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.25.proj_out.bias = util.global.load @single_transformer_blocks.25.proj_out.bias : tensor<3072xbf16> %622 = torch_c.from_builtin_tensor %single_transformer_blocks.25.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.25.attn.norm_q.weight = util.global.load @single_transformer_blocks.25.attn.norm_q.weight : tensor<128xbf16> %623 = torch_c.from_builtin_tensor %single_transformer_blocks.25.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.25.attn.norm_k.weight = util.global.load @single_transformer_blocks.25.attn.norm_k.weight : tensor<128xbf16> %624 = torch_c.from_builtin_tensor %single_transformer_blocks.25.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.25.attn.to_q.bias = util.global.load @single_transformer_blocks.25.attn.to_q.bias : tensor<3072xbf16> %625 = torch_c.from_builtin_tensor %single_transformer_blocks.25.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.25.attn.to_k.bias = util.global.load @single_transformer_blocks.25.attn.to_k.bias : tensor<3072xbf16> %626 = torch_c.from_builtin_tensor %single_transformer_blocks.25.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.25.attn.to_v.bias = util.global.load @single_transformer_blocks.25.attn.to_v.bias : tensor<3072xbf16> %627 = torch_c.from_builtin_tensor %single_transformer_blocks.25.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.26.norm.linear.weight = util.global.load @single_transformer_blocks.26.norm.linear.weight : tensor<9216x3072xbf16> %628 = torch_c.from_builtin_tensor %single_transformer_blocks.26.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.26.norm.linear.bias = util.global.load @single_transformer_blocks.26.norm.linear.bias : tensor<9216xbf16> %629 = torch_c.from_builtin_tensor %single_transformer_blocks.26.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.26.proj_mlp.bias = util.global.load @single_transformer_blocks.26.proj_mlp.bias : tensor<12288xbf16> %630 = torch_c.from_builtin_tensor %single_transformer_blocks.26.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.26.proj_out.bias = util.global.load @single_transformer_blocks.26.proj_out.bias : tensor<3072xbf16> %631 = torch_c.from_builtin_tensor %single_transformer_blocks.26.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.26.attn.norm_q.weight = util.global.load @single_transformer_blocks.26.attn.norm_q.weight : tensor<128xbf16> %632 = torch_c.from_builtin_tensor %single_transformer_blocks.26.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.26.attn.norm_k.weight = util.global.load @single_transformer_blocks.26.attn.norm_k.weight : tensor<128xbf16> %633 = torch_c.from_builtin_tensor %single_transformer_blocks.26.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.26.attn.to_q.bias = util.global.load @single_transformer_blocks.26.attn.to_q.bias : tensor<3072xbf16> %634 = torch_c.from_builtin_tensor %single_transformer_blocks.26.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.26.attn.to_k.bias = util.global.load @single_transformer_blocks.26.attn.to_k.bias : tensor<3072xbf16> %635 = torch_c.from_builtin_tensor %single_transformer_blocks.26.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.26.attn.to_v.bias = util.global.load @single_transformer_blocks.26.attn.to_v.bias : tensor<3072xbf16> %636 = torch_c.from_builtin_tensor %single_transformer_blocks.26.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.27.norm.linear.weight = util.global.load @single_transformer_blocks.27.norm.linear.weight : tensor<9216x3072xbf16> %637 = torch_c.from_builtin_tensor %single_transformer_blocks.27.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.27.norm.linear.bias = util.global.load @single_transformer_blocks.27.norm.linear.bias : tensor<9216xbf16> %638 = torch_c.from_builtin_tensor %single_transformer_blocks.27.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.27.proj_mlp.bias = util.global.load @single_transformer_blocks.27.proj_mlp.bias : tensor<12288xbf16> %639 = torch_c.from_builtin_tensor %single_transformer_blocks.27.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.27.proj_out.bias = util.global.load @single_transformer_blocks.27.proj_out.bias : tensor<3072xbf16> %640 = torch_c.from_builtin_tensor %single_transformer_blocks.27.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.27.attn.norm_q.weight = util.global.load @single_transformer_blocks.27.attn.norm_q.weight : tensor<128xbf16> %641 = torch_c.from_builtin_tensor %single_transformer_blocks.27.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.27.attn.norm_k.weight = util.global.load @single_transformer_blocks.27.attn.norm_k.weight : tensor<128xbf16> %642 = torch_c.from_builtin_tensor %single_transformer_blocks.27.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.27.attn.to_q.bias = util.global.load @single_transformer_blocks.27.attn.to_q.bias : tensor<3072xbf16> %643 = torch_c.from_builtin_tensor %single_transformer_blocks.27.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.27.attn.to_k.bias = util.global.load @single_transformer_blocks.27.attn.to_k.bias : tensor<3072xbf16> %644 = torch_c.from_builtin_tensor %single_transformer_blocks.27.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.27.attn.to_v.bias = util.global.load @single_transformer_blocks.27.attn.to_v.bias : tensor<3072xbf16> %645 = torch_c.from_builtin_tensor %single_transformer_blocks.27.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.28.norm.linear.weight = util.global.load @single_transformer_blocks.28.norm.linear.weight : tensor<9216x3072xbf16> %646 = torch_c.from_builtin_tensor %single_transformer_blocks.28.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.28.norm.linear.bias = util.global.load @single_transformer_blocks.28.norm.linear.bias : tensor<9216xbf16> %647 = torch_c.from_builtin_tensor %single_transformer_blocks.28.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.28.proj_mlp.bias = util.global.load @single_transformer_blocks.28.proj_mlp.bias : tensor<12288xbf16> %648 = torch_c.from_builtin_tensor %single_transformer_blocks.28.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.28.proj_out.bias = util.global.load @single_transformer_blocks.28.proj_out.bias : tensor<3072xbf16> %649 = torch_c.from_builtin_tensor %single_transformer_blocks.28.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.28.attn.norm_q.weight = util.global.load @single_transformer_blocks.28.attn.norm_q.weight : tensor<128xbf16> %650 = torch_c.from_builtin_tensor %single_transformer_blocks.28.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.28.attn.norm_k.weight = util.global.load @single_transformer_blocks.28.attn.norm_k.weight : tensor<128xbf16> %651 = torch_c.from_builtin_tensor %single_transformer_blocks.28.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.28.attn.to_q.bias = util.global.load @single_transformer_blocks.28.attn.to_q.bias : tensor<3072xbf16> %652 = torch_c.from_builtin_tensor %single_transformer_blocks.28.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.28.attn.to_k.bias = util.global.load @single_transformer_blocks.28.attn.to_k.bias : tensor<3072xbf16> %653 = torch_c.from_builtin_tensor %single_transformer_blocks.28.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.28.attn.to_v.bias = util.global.load @single_transformer_blocks.28.attn.to_v.bias : tensor<3072xbf16> %654 = torch_c.from_builtin_tensor %single_transformer_blocks.28.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.29.norm.linear.weight = util.global.load @single_transformer_blocks.29.norm.linear.weight : tensor<9216x3072xbf16> %655 = torch_c.from_builtin_tensor %single_transformer_blocks.29.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.29.norm.linear.bias = util.global.load @single_transformer_blocks.29.norm.linear.bias : tensor<9216xbf16> %656 = torch_c.from_builtin_tensor %single_transformer_blocks.29.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.29.proj_mlp.bias = util.global.load @single_transformer_blocks.29.proj_mlp.bias : tensor<12288xbf16> %657 = torch_c.from_builtin_tensor %single_transformer_blocks.29.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.29.proj_out.bias = util.global.load @single_transformer_blocks.29.proj_out.bias : tensor<3072xbf16> %658 = torch_c.from_builtin_tensor %single_transformer_blocks.29.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.29.attn.norm_q.weight = util.global.load @single_transformer_blocks.29.attn.norm_q.weight : tensor<128xbf16> %659 = torch_c.from_builtin_tensor %single_transformer_blocks.29.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.29.attn.norm_k.weight = util.global.load @single_transformer_blocks.29.attn.norm_k.weight : tensor<128xbf16> %660 = torch_c.from_builtin_tensor %single_transformer_blocks.29.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.29.attn.to_q.bias = util.global.load @single_transformer_blocks.29.attn.to_q.bias : tensor<3072xbf16> %661 = torch_c.from_builtin_tensor %single_transformer_blocks.29.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.29.attn.to_k.bias = util.global.load @single_transformer_blocks.29.attn.to_k.bias : tensor<3072xbf16> %662 = torch_c.from_builtin_tensor %single_transformer_blocks.29.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.29.attn.to_v.bias = util.global.load @single_transformer_blocks.29.attn.to_v.bias : tensor<3072xbf16> %663 = torch_c.from_builtin_tensor %single_transformer_blocks.29.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.30.norm.linear.weight = util.global.load @single_transformer_blocks.30.norm.linear.weight : tensor<9216x3072xbf16> %664 = torch_c.from_builtin_tensor %single_transformer_blocks.30.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.30.norm.linear.bias = util.global.load @single_transformer_blocks.30.norm.linear.bias : tensor<9216xbf16> %665 = torch_c.from_builtin_tensor %single_transformer_blocks.30.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.30.proj_mlp.bias = util.global.load @single_transformer_blocks.30.proj_mlp.bias : tensor<12288xbf16> %666 = torch_c.from_builtin_tensor %single_transformer_blocks.30.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.30.proj_out.bias = util.global.load @single_transformer_blocks.30.proj_out.bias : tensor<3072xbf16> %667 = torch_c.from_builtin_tensor %single_transformer_blocks.30.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.30.attn.norm_q.weight = util.global.load @single_transformer_blocks.30.attn.norm_q.weight : tensor<128xbf16> %668 = torch_c.from_builtin_tensor %single_transformer_blocks.30.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.30.attn.norm_k.weight = util.global.load @single_transformer_blocks.30.attn.norm_k.weight : tensor<128xbf16> %669 = torch_c.from_builtin_tensor %single_transformer_blocks.30.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.30.attn.to_q.bias = util.global.load @single_transformer_blocks.30.attn.to_q.bias : tensor<3072xbf16> %670 = torch_c.from_builtin_tensor %single_transformer_blocks.30.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.30.attn.to_k.bias = util.global.load @single_transformer_blocks.30.attn.to_k.bias : tensor<3072xbf16> %671 = torch_c.from_builtin_tensor %single_transformer_blocks.30.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.30.attn.to_v.bias = util.global.load @single_transformer_blocks.30.attn.to_v.bias : tensor<3072xbf16> %672 = torch_c.from_builtin_tensor %single_transformer_blocks.30.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.31.norm.linear.weight = util.global.load @single_transformer_blocks.31.norm.linear.weight : tensor<9216x3072xbf16> %673 = torch_c.from_builtin_tensor %single_transformer_blocks.31.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.31.norm.linear.bias = util.global.load @single_transformer_blocks.31.norm.linear.bias : tensor<9216xbf16> %674 = torch_c.from_builtin_tensor %single_transformer_blocks.31.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.31.proj_mlp.bias = util.global.load @single_transformer_blocks.31.proj_mlp.bias : tensor<12288xbf16> %675 = torch_c.from_builtin_tensor %single_transformer_blocks.31.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.31.proj_out.bias = util.global.load @single_transformer_blocks.31.proj_out.bias : tensor<3072xbf16> %676 = torch_c.from_builtin_tensor %single_transformer_blocks.31.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.31.attn.norm_q.weight = util.global.load @single_transformer_blocks.31.attn.norm_q.weight : tensor<128xbf16> %677 = torch_c.from_builtin_tensor %single_transformer_blocks.31.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.31.attn.norm_k.weight = util.global.load @single_transformer_blocks.31.attn.norm_k.weight : tensor<128xbf16> %678 = torch_c.from_builtin_tensor %single_transformer_blocks.31.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.31.attn.to_q.bias = util.global.load @single_transformer_blocks.31.attn.to_q.bias : tensor<3072xbf16> %679 = torch_c.from_builtin_tensor %single_transformer_blocks.31.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.31.attn.to_k.bias = util.global.load @single_transformer_blocks.31.attn.to_k.bias : tensor<3072xbf16> %680 = torch_c.from_builtin_tensor %single_transformer_blocks.31.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.31.attn.to_v.bias = util.global.load @single_transformer_blocks.31.attn.to_v.bias : tensor<3072xbf16> %681 = torch_c.from_builtin_tensor %single_transformer_blocks.31.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.32.norm.linear.weight = util.global.load @single_transformer_blocks.32.norm.linear.weight : tensor<9216x3072xbf16> %682 = torch_c.from_builtin_tensor %single_transformer_blocks.32.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.32.norm.linear.bias = util.global.load @single_transformer_blocks.32.norm.linear.bias : tensor<9216xbf16> %683 = torch_c.from_builtin_tensor %single_transformer_blocks.32.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.32.proj_mlp.bias = util.global.load @single_transformer_blocks.32.proj_mlp.bias : tensor<12288xbf16> %684 = torch_c.from_builtin_tensor %single_transformer_blocks.32.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.32.proj_out.bias = util.global.load @single_transformer_blocks.32.proj_out.bias : tensor<3072xbf16> %685 = torch_c.from_builtin_tensor %single_transformer_blocks.32.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.32.attn.norm_q.weight = util.global.load @single_transformer_blocks.32.attn.norm_q.weight : tensor<128xbf16> %686 = torch_c.from_builtin_tensor %single_transformer_blocks.32.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.32.attn.norm_k.weight = util.global.load @single_transformer_blocks.32.attn.norm_k.weight : tensor<128xbf16> %687 = torch_c.from_builtin_tensor %single_transformer_blocks.32.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.32.attn.to_q.bias = util.global.load @single_transformer_blocks.32.attn.to_q.bias : tensor<3072xbf16> %688 = torch_c.from_builtin_tensor %single_transformer_blocks.32.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.32.attn.to_k.bias = util.global.load @single_transformer_blocks.32.attn.to_k.bias : tensor<3072xbf16> %689 = torch_c.from_builtin_tensor %single_transformer_blocks.32.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.32.attn.to_v.bias = util.global.load @single_transformer_blocks.32.attn.to_v.bias : tensor<3072xbf16> %690 = torch_c.from_builtin_tensor %single_transformer_blocks.32.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.33.norm.linear.weight = util.global.load @single_transformer_blocks.33.norm.linear.weight : tensor<9216x3072xbf16> %691 = torch_c.from_builtin_tensor %single_transformer_blocks.33.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.33.norm.linear.bias = util.global.load @single_transformer_blocks.33.norm.linear.bias : tensor<9216xbf16> %692 = torch_c.from_builtin_tensor %single_transformer_blocks.33.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.33.proj_mlp.bias = util.global.load @single_transformer_blocks.33.proj_mlp.bias : tensor<12288xbf16> %693 = torch_c.from_builtin_tensor %single_transformer_blocks.33.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.33.proj_out.bias = util.global.load @single_transformer_blocks.33.proj_out.bias : tensor<3072xbf16> %694 = torch_c.from_builtin_tensor %single_transformer_blocks.33.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.33.attn.norm_q.weight = util.global.load @single_transformer_blocks.33.attn.norm_q.weight : tensor<128xbf16> %695 = torch_c.from_builtin_tensor %single_transformer_blocks.33.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.33.attn.norm_k.weight = util.global.load @single_transformer_blocks.33.attn.norm_k.weight : tensor<128xbf16> %696 = torch_c.from_builtin_tensor %single_transformer_blocks.33.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.33.attn.to_q.bias = util.global.load @single_transformer_blocks.33.attn.to_q.bias : tensor<3072xbf16> %697 = torch_c.from_builtin_tensor %single_transformer_blocks.33.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.33.attn.to_k.bias = util.global.load @single_transformer_blocks.33.attn.to_k.bias : tensor<3072xbf16> %698 = torch_c.from_builtin_tensor %single_transformer_blocks.33.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.33.attn.to_v.bias = util.global.load @single_transformer_blocks.33.attn.to_v.bias : tensor<3072xbf16> %699 = torch_c.from_builtin_tensor %single_transformer_blocks.33.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.34.norm.linear.weight = util.global.load @single_transformer_blocks.34.norm.linear.weight : tensor<9216x3072xbf16> %700 = torch_c.from_builtin_tensor %single_transformer_blocks.34.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.34.norm.linear.bias = util.global.load @single_transformer_blocks.34.norm.linear.bias : tensor<9216xbf16> %701 = torch_c.from_builtin_tensor %single_transformer_blocks.34.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.34.proj_mlp.bias = util.global.load @single_transformer_blocks.34.proj_mlp.bias : tensor<12288xbf16> %702 = torch_c.from_builtin_tensor %single_transformer_blocks.34.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.34.proj_out.bias = util.global.load @single_transformer_blocks.34.proj_out.bias : tensor<3072xbf16> %703 = torch_c.from_builtin_tensor %single_transformer_blocks.34.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.34.attn.norm_q.weight = util.global.load @single_transformer_blocks.34.attn.norm_q.weight : tensor<128xbf16> %704 = torch_c.from_builtin_tensor %single_transformer_blocks.34.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.34.attn.norm_k.weight = util.global.load @single_transformer_blocks.34.attn.norm_k.weight : tensor<128xbf16> %705 = torch_c.from_builtin_tensor %single_transformer_blocks.34.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.34.attn.to_q.bias = util.global.load @single_transformer_blocks.34.attn.to_q.bias : tensor<3072xbf16> %706 = torch_c.from_builtin_tensor %single_transformer_blocks.34.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.34.attn.to_k.bias = util.global.load @single_transformer_blocks.34.attn.to_k.bias : tensor<3072xbf16> %707 = torch_c.from_builtin_tensor %single_transformer_blocks.34.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.34.attn.to_v.bias = util.global.load @single_transformer_blocks.34.attn.to_v.bias : tensor<3072xbf16> %708 = torch_c.from_builtin_tensor %single_transformer_blocks.34.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.35.norm.linear.weight = util.global.load @single_transformer_blocks.35.norm.linear.weight : tensor<9216x3072xbf16> %709 = torch_c.from_builtin_tensor %single_transformer_blocks.35.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.35.norm.linear.bias = util.global.load @single_transformer_blocks.35.norm.linear.bias : tensor<9216xbf16> %710 = torch_c.from_builtin_tensor %single_transformer_blocks.35.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.35.proj_mlp.bias = util.global.load @single_transformer_blocks.35.proj_mlp.bias : tensor<12288xbf16> %711 = torch_c.from_builtin_tensor %single_transformer_blocks.35.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.35.proj_out.bias = util.global.load @single_transformer_blocks.35.proj_out.bias : tensor<3072xbf16> %712 = torch_c.from_builtin_tensor %single_transformer_blocks.35.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.35.attn.norm_q.weight = util.global.load @single_transformer_blocks.35.attn.norm_q.weight : tensor<128xbf16> %713 = torch_c.from_builtin_tensor %single_transformer_blocks.35.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.35.attn.norm_k.weight = util.global.load @single_transformer_blocks.35.attn.norm_k.weight : tensor<128xbf16> %714 = torch_c.from_builtin_tensor %single_transformer_blocks.35.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.35.attn.to_q.bias = util.global.load @single_transformer_blocks.35.attn.to_q.bias : tensor<3072xbf16> %715 = torch_c.from_builtin_tensor %single_transformer_blocks.35.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.35.attn.to_k.bias = util.global.load @single_transformer_blocks.35.attn.to_k.bias : tensor<3072xbf16> %716 = torch_c.from_builtin_tensor %single_transformer_blocks.35.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.35.attn.to_v.bias = util.global.load @single_transformer_blocks.35.attn.to_v.bias : tensor<3072xbf16> %717 = torch_c.from_builtin_tensor %single_transformer_blocks.35.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.36.norm.linear.weight = util.global.load @single_transformer_blocks.36.norm.linear.weight : tensor<9216x3072xbf16> %718 = torch_c.from_builtin_tensor %single_transformer_blocks.36.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.36.norm.linear.bias = util.global.load @single_transformer_blocks.36.norm.linear.bias : tensor<9216xbf16> %719 = torch_c.from_builtin_tensor %single_transformer_blocks.36.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.36.proj_mlp.bias = util.global.load @single_transformer_blocks.36.proj_mlp.bias : tensor<12288xbf16> %720 = torch_c.from_builtin_tensor %single_transformer_blocks.36.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.36.proj_out.bias = util.global.load @single_transformer_blocks.36.proj_out.bias : tensor<3072xbf16> %721 = torch_c.from_builtin_tensor %single_transformer_blocks.36.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.36.attn.norm_q.weight = util.global.load @single_transformer_blocks.36.attn.norm_q.weight : tensor<128xbf16> %722 = torch_c.from_builtin_tensor %single_transformer_blocks.36.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.36.attn.norm_k.weight = util.global.load @single_transformer_blocks.36.attn.norm_k.weight : tensor<128xbf16> %723 = torch_c.from_builtin_tensor %single_transformer_blocks.36.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.36.attn.to_q.bias = util.global.load @single_transformer_blocks.36.attn.to_q.bias : tensor<3072xbf16> %724 = torch_c.from_builtin_tensor %single_transformer_blocks.36.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.36.attn.to_k.bias = util.global.load @single_transformer_blocks.36.attn.to_k.bias : tensor<3072xbf16> %725 = torch_c.from_builtin_tensor %single_transformer_blocks.36.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.36.attn.to_v.bias = util.global.load @single_transformer_blocks.36.attn.to_v.bias : tensor<3072xbf16> %726 = torch_c.from_builtin_tensor %single_transformer_blocks.36.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.37.norm.linear.weight = util.global.load @single_transformer_blocks.37.norm.linear.weight : tensor<9216x3072xbf16> %727 = torch_c.from_builtin_tensor %single_transformer_blocks.37.norm.linear.weight : tensor<9216x3072xbf16> -> !torch.vtensor<[9216,3072],bf16> %single_transformer_blocks.37.norm.linear.bias = util.global.load @single_transformer_blocks.37.norm.linear.bias : tensor<9216xbf16> %728 = torch_c.from_builtin_tensor %single_transformer_blocks.37.norm.linear.bias : tensor<9216xbf16> -> !torch.vtensor<[9216],bf16> %single_transformer_blocks.37.proj_mlp.bias = util.global.load @single_transformer_blocks.37.proj_mlp.bias : tensor<12288xbf16> %729 = torch_c.from_builtin_tensor %single_transformer_blocks.37.proj_mlp.bias : tensor<12288xbf16> -> !torch.vtensor<[12288],bf16> %single_transformer_blocks.37.proj_out.bias = util.global.load @single_transformer_blocks.37.proj_out.bias : tensor<3072xbf16> %730 = torch_c.from_builtin_tensor %single_transformer_blocks.37.proj_out.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.37.attn.norm_q.weight = util.global.load @single_transformer_blocks.37.attn.norm_q.weight : tensor<128xbf16> %731 = torch_c.from_builtin_tensor %single_transformer_blocks.37.attn.norm_q.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.37.attn.norm_k.weight = util.global.load @single_transformer_blocks.37.attn.norm_k.weight : tensor<128xbf16> %732 = torch_c.from_builtin_tensor %single_transformer_blocks.37.attn.norm_k.weight : tensor<128xbf16> -> !torch.vtensor<[128],bf16> %single_transformer_blocks.37.attn.to_q.bias = util.global.load @single_transformer_blocks.37.attn.to_q.bias : tensor<3072xbf16> %733 = torch_c.from_builtin_tensor %single_transformer_blocks.37.attn.to_q.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.37.attn.to_k.bias = util.global.load @single_transformer_blocks.37.attn.to_k.bias : tensor<3072xbf16> %734 = torch_c.from_builtin_tensor %single_transformer_blocks.37.attn.to_k.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %single_transformer_blocks.37.attn.to_v.bias = util.global.load @single_transformer_blocks.37.attn.to_v.bias : tensor<3072xbf16> %735 = torch_c.from_builtin_tensor %single_transformer_blocks.37.attn.to_v.bias : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %norm_out.linear.weight = util.global.load @norm_out.linear.weight : tensor<6144x3072xbf16> %736 = torch_c.from_builtin_tensor %norm_out.linear.weight : tensor<6144x3072xbf16> -> !torch.vtensor<[6144,3072],bf16> %norm_out.linear.bias = util.global.load @norm_out.linear.bias : tensor<6144xbf16> %737 = torch_c.from_builtin_tensor %norm_out.linear.bias : tensor<6144xbf16> -> !torch.vtensor<[6144],bf16> %738 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_proj_out.bias> : tensor<64xbf16>} : () -> !torch.vtensor<[64],bf16> %onnx__MatMul_19741 = util.global.load @onnx__MatMul_19741 : tensor<64x3072xbf16> %739 = torch_c.from_builtin_tensor %onnx__MatMul_19741 : tensor<64x3072xbf16> -> !torch.vtensor<[64,3072],bf16> %onnx__MatMul_19758 = util.global.load @onnx__MatMul_19758 : tensor<4096x3072xbf16> %740 = torch_c.from_builtin_tensor %onnx__MatMul_19758 : tensor<4096x3072xbf16> -> !torch.vtensor<[4096,3072],bf16> %onnx__MatMul_19762 = util.global.load @onnx__MatMul_19762 : tensor<3072x3072xbf16> %741 = torch_c.from_builtin_tensor %onnx__MatMul_19762 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19763 = util.global.load @onnx__MatMul_19763 : tensor<3072x3072xbf16> %742 = torch_c.from_builtin_tensor %onnx__MatMul_19763 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19764 = util.global.load @onnx__MatMul_19764 : tensor<3072x3072xbf16> %743 = torch_c.from_builtin_tensor %onnx__MatMul_19764 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19795 = util.global.load @onnx__MatMul_19795 : tensor<3072x3072xbf16> %744 = torch_c.from_builtin_tensor %onnx__MatMul_19795 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19796 = util.global.load @onnx__MatMul_19796 : tensor<3072x3072xbf16> %745 = torch_c.from_builtin_tensor %onnx__MatMul_19796 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19797 = util.global.load @onnx__MatMul_19797 : tensor<3072x3072xbf16> %746 = torch_c.from_builtin_tensor %onnx__MatMul_19797 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19822 = util.global.load @onnx__MatMul_19822 : tensor<3072x3072xbf16> %747 = torch_c.from_builtin_tensor %onnx__MatMul_19822 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19823 = util.global.load @onnx__MatMul_19823 : tensor<3072x3072xbf16> %748 = torch_c.from_builtin_tensor %onnx__MatMul_19823 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19824 = util.global.load @onnx__MatMul_19824 : tensor<3072x12288xbf16> %749 = torch_c.from_builtin_tensor %onnx__MatMul_19824 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_19825 = util.global.load @onnx__MatMul_19825 : tensor<12288x3072xbf16> %750 = torch_c.from_builtin_tensor %onnx__MatMul_19825 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_19826 = util.global.load @onnx__MatMul_19826 : tensor<3072x12288xbf16> %751 = torch_c.from_builtin_tensor %onnx__MatMul_19826 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_19827 = util.global.load @onnx__MatMul_19827 : tensor<12288x3072xbf16> %752 = torch_c.from_builtin_tensor %onnx__MatMul_19827 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_19828 = util.global.load @onnx__MatMul_19828 : tensor<3072x3072xbf16> %753 = torch_c.from_builtin_tensor %onnx__MatMul_19828 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19829 = util.global.load @onnx__MatMul_19829 : tensor<3072x3072xbf16> %754 = torch_c.from_builtin_tensor %onnx__MatMul_19829 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19830 = util.global.load @onnx__MatMul_19830 : tensor<3072x3072xbf16> %755 = torch_c.from_builtin_tensor %onnx__MatMul_19830 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19843 = util.global.load @onnx__MatMul_19843 : tensor<3072x3072xbf16> %756 = torch_c.from_builtin_tensor %onnx__MatMul_19843 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19844 = util.global.load @onnx__MatMul_19844 : tensor<3072x3072xbf16> %757 = torch_c.from_builtin_tensor %onnx__MatMul_19844 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19845 = util.global.load @onnx__MatMul_19845 : tensor<3072x3072xbf16> %758 = torch_c.from_builtin_tensor %onnx__MatMul_19845 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19857 = util.global.load @onnx__MatMul_19857 : tensor<3072x3072xbf16> %759 = torch_c.from_builtin_tensor %onnx__MatMul_19857 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19858 = util.global.load @onnx__MatMul_19858 : tensor<3072x3072xbf16> %760 = torch_c.from_builtin_tensor %onnx__MatMul_19858 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19859 = util.global.load @onnx__MatMul_19859 : tensor<3072x12288xbf16> %761 = torch_c.from_builtin_tensor %onnx__MatMul_19859 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_19860 = util.global.load @onnx__MatMul_19860 : tensor<12288x3072xbf16> %762 = torch_c.from_builtin_tensor %onnx__MatMul_19860 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_19861 = util.global.load @onnx__MatMul_19861 : tensor<3072x12288xbf16> %763 = torch_c.from_builtin_tensor %onnx__MatMul_19861 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_19862 = util.global.load @onnx__MatMul_19862 : tensor<12288x3072xbf16> %764 = torch_c.from_builtin_tensor %onnx__MatMul_19862 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_19863 = util.global.load @onnx__MatMul_19863 : tensor<3072x3072xbf16> %765 = torch_c.from_builtin_tensor %onnx__MatMul_19863 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19864 = util.global.load @onnx__MatMul_19864 : tensor<3072x3072xbf16> %766 = torch_c.from_builtin_tensor %onnx__MatMul_19864 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19865 = util.global.load @onnx__MatMul_19865 : tensor<3072x3072xbf16> %767 = torch_c.from_builtin_tensor %onnx__MatMul_19865 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19878 = util.global.load @onnx__MatMul_19878 : tensor<3072x3072xbf16> %768 = torch_c.from_builtin_tensor %onnx__MatMul_19878 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19879 = util.global.load @onnx__MatMul_19879 : tensor<3072x3072xbf16> %769 = torch_c.from_builtin_tensor %onnx__MatMul_19879 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19880 = util.global.load @onnx__MatMul_19880 : tensor<3072x3072xbf16> %770 = torch_c.from_builtin_tensor %onnx__MatMul_19880 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19892 = util.global.load @onnx__MatMul_19892 : tensor<3072x3072xbf16> %771 = torch_c.from_builtin_tensor %onnx__MatMul_19892 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19893 = util.global.load @onnx__MatMul_19893 : tensor<3072x3072xbf16> %772 = torch_c.from_builtin_tensor %onnx__MatMul_19893 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19894 = util.global.load @onnx__MatMul_19894 : tensor<3072x12288xbf16> %773 = torch_c.from_builtin_tensor %onnx__MatMul_19894 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_19895 = util.global.load @onnx__MatMul_19895 : tensor<12288x3072xbf16> %774 = torch_c.from_builtin_tensor %onnx__MatMul_19895 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_19896 = util.global.load @onnx__MatMul_19896 : tensor<3072x12288xbf16> %775 = torch_c.from_builtin_tensor %onnx__MatMul_19896 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_19897 = util.global.load @onnx__MatMul_19897 : tensor<12288x3072xbf16> %776 = torch_c.from_builtin_tensor %onnx__MatMul_19897 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_19898 = util.global.load @onnx__MatMul_19898 : tensor<3072x3072xbf16> %777 = torch_c.from_builtin_tensor %onnx__MatMul_19898 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19899 = util.global.load @onnx__MatMul_19899 : tensor<3072x3072xbf16> %778 = torch_c.from_builtin_tensor %onnx__MatMul_19899 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19900 = util.global.load @onnx__MatMul_19900 : tensor<3072x3072xbf16> %779 = torch_c.from_builtin_tensor %onnx__MatMul_19900 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19913 = util.global.load @onnx__MatMul_19913 : tensor<3072x3072xbf16> %780 = torch_c.from_builtin_tensor %onnx__MatMul_19913 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19914 = util.global.load @onnx__MatMul_19914 : tensor<3072x3072xbf16> %781 = torch_c.from_builtin_tensor %onnx__MatMul_19914 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19915 = util.global.load @onnx__MatMul_19915 : tensor<3072x3072xbf16> %782 = torch_c.from_builtin_tensor %onnx__MatMul_19915 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19927 = util.global.load @onnx__MatMul_19927 : tensor<3072x3072xbf16> %783 = torch_c.from_builtin_tensor %onnx__MatMul_19927 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19928 = util.global.load @onnx__MatMul_19928 : tensor<3072x3072xbf16> %784 = torch_c.from_builtin_tensor %onnx__MatMul_19928 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19929 = util.global.load @onnx__MatMul_19929 : tensor<3072x12288xbf16> %785 = torch_c.from_builtin_tensor %onnx__MatMul_19929 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_19930 = util.global.load @onnx__MatMul_19930 : tensor<12288x3072xbf16> %786 = torch_c.from_builtin_tensor %onnx__MatMul_19930 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_19931 = util.global.load @onnx__MatMul_19931 : tensor<3072x12288xbf16> %787 = torch_c.from_builtin_tensor %onnx__MatMul_19931 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_19932 = util.global.load @onnx__MatMul_19932 : tensor<12288x3072xbf16> %788 = torch_c.from_builtin_tensor %onnx__MatMul_19932 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_19933 = util.global.load @onnx__MatMul_19933 : tensor<3072x3072xbf16> %789 = torch_c.from_builtin_tensor %onnx__MatMul_19933 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19934 = util.global.load @onnx__MatMul_19934 : tensor<3072x3072xbf16> %790 = torch_c.from_builtin_tensor %onnx__MatMul_19934 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19935 = util.global.load @onnx__MatMul_19935 : tensor<3072x3072xbf16> %791 = torch_c.from_builtin_tensor %onnx__MatMul_19935 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19948 = util.global.load @onnx__MatMul_19948 : tensor<3072x3072xbf16> %792 = torch_c.from_builtin_tensor %onnx__MatMul_19948 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19949 = util.global.load @onnx__MatMul_19949 : tensor<3072x3072xbf16> %793 = torch_c.from_builtin_tensor %onnx__MatMul_19949 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19950 = util.global.load @onnx__MatMul_19950 : tensor<3072x3072xbf16> %794 = torch_c.from_builtin_tensor %onnx__MatMul_19950 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19962 = util.global.load @onnx__MatMul_19962 : tensor<3072x3072xbf16> %795 = torch_c.from_builtin_tensor %onnx__MatMul_19962 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19963 = util.global.load @onnx__MatMul_19963 : tensor<3072x3072xbf16> %796 = torch_c.from_builtin_tensor %onnx__MatMul_19963 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19964 = util.global.load @onnx__MatMul_19964 : tensor<3072x12288xbf16> %797 = torch_c.from_builtin_tensor %onnx__MatMul_19964 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_19965 = util.global.load @onnx__MatMul_19965 : tensor<12288x3072xbf16> %798 = torch_c.from_builtin_tensor %onnx__MatMul_19965 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_19966 = util.global.load @onnx__MatMul_19966 : tensor<3072x12288xbf16> %799 = torch_c.from_builtin_tensor %onnx__MatMul_19966 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_19967 = util.global.load @onnx__MatMul_19967 : tensor<12288x3072xbf16> %800 = torch_c.from_builtin_tensor %onnx__MatMul_19967 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_19968 = util.global.load @onnx__MatMul_19968 : tensor<3072x3072xbf16> %801 = torch_c.from_builtin_tensor %onnx__MatMul_19968 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19969 = util.global.load @onnx__MatMul_19969 : tensor<3072x3072xbf16> %802 = torch_c.from_builtin_tensor %onnx__MatMul_19969 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19970 = util.global.load @onnx__MatMul_19970 : tensor<3072x3072xbf16> %803 = torch_c.from_builtin_tensor %onnx__MatMul_19970 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19983 = util.global.load @onnx__MatMul_19983 : tensor<3072x3072xbf16> %804 = torch_c.from_builtin_tensor %onnx__MatMul_19983 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19984 = util.global.load @onnx__MatMul_19984 : tensor<3072x3072xbf16> %805 = torch_c.from_builtin_tensor %onnx__MatMul_19984 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19985 = util.global.load @onnx__MatMul_19985 : tensor<3072x3072xbf16> %806 = torch_c.from_builtin_tensor %onnx__MatMul_19985 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19997 = util.global.load @onnx__MatMul_19997 : tensor<3072x3072xbf16> %807 = torch_c.from_builtin_tensor %onnx__MatMul_19997 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19998 = util.global.load @onnx__MatMul_19998 : tensor<3072x3072xbf16> %808 = torch_c.from_builtin_tensor %onnx__MatMul_19998 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_19999 = util.global.load @onnx__MatMul_19999 : tensor<3072x12288xbf16> %809 = torch_c.from_builtin_tensor %onnx__MatMul_19999 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20000 = util.global.load @onnx__MatMul_20000 : tensor<12288x3072xbf16> %810 = torch_c.from_builtin_tensor %onnx__MatMul_20000 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_20001 = util.global.load @onnx__MatMul_20001 : tensor<3072x12288xbf16> %811 = torch_c.from_builtin_tensor %onnx__MatMul_20001 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20002 = util.global.load @onnx__MatMul_20002 : tensor<12288x3072xbf16> %812 = torch_c.from_builtin_tensor %onnx__MatMul_20002 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_20003 = util.global.load @onnx__MatMul_20003 : tensor<3072x3072xbf16> %813 = torch_c.from_builtin_tensor %onnx__MatMul_20003 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20004 = util.global.load @onnx__MatMul_20004 : tensor<3072x3072xbf16> %814 = torch_c.from_builtin_tensor %onnx__MatMul_20004 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20005 = util.global.load @onnx__MatMul_20005 : tensor<3072x3072xbf16> %815 = torch_c.from_builtin_tensor %onnx__MatMul_20005 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20018 = util.global.load @onnx__MatMul_20018 : tensor<3072x3072xbf16> %816 = torch_c.from_builtin_tensor %onnx__MatMul_20018 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20019 = util.global.load @onnx__MatMul_20019 : tensor<3072x3072xbf16> %817 = torch_c.from_builtin_tensor %onnx__MatMul_20019 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20020 = util.global.load @onnx__MatMul_20020 : tensor<3072x3072xbf16> %818 = torch_c.from_builtin_tensor %onnx__MatMul_20020 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20032 = util.global.load @onnx__MatMul_20032 : tensor<3072x3072xbf16> %819 = torch_c.from_builtin_tensor %onnx__MatMul_20032 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20033 = util.global.load @onnx__MatMul_20033 : tensor<3072x3072xbf16> %820 = torch_c.from_builtin_tensor %onnx__MatMul_20033 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20034 = util.global.load @onnx__MatMul_20034 : tensor<3072x12288xbf16> %821 = torch_c.from_builtin_tensor %onnx__MatMul_20034 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20035 = util.global.load @onnx__MatMul_20035 : tensor<12288x3072xbf16> %822 = torch_c.from_builtin_tensor %onnx__MatMul_20035 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_20036 = util.global.load @onnx__MatMul_20036 : tensor<3072x12288xbf16> %823 = torch_c.from_builtin_tensor %onnx__MatMul_20036 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20037 = util.global.load @onnx__MatMul_20037 : tensor<12288x3072xbf16> %824 = torch_c.from_builtin_tensor %onnx__MatMul_20037 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_20038 = util.global.load @onnx__MatMul_20038 : tensor<3072x3072xbf16> %825 = torch_c.from_builtin_tensor %onnx__MatMul_20038 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20039 = util.global.load @onnx__MatMul_20039 : tensor<3072x3072xbf16> %826 = torch_c.from_builtin_tensor %onnx__MatMul_20039 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20040 = util.global.load @onnx__MatMul_20040 : tensor<3072x3072xbf16> %827 = torch_c.from_builtin_tensor %onnx__MatMul_20040 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20053 = util.global.load @onnx__MatMul_20053 : tensor<3072x3072xbf16> %828 = torch_c.from_builtin_tensor %onnx__MatMul_20053 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20054 = util.global.load @onnx__MatMul_20054 : tensor<3072x3072xbf16> %829 = torch_c.from_builtin_tensor %onnx__MatMul_20054 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20055 = util.global.load @onnx__MatMul_20055 : tensor<3072x3072xbf16> %830 = torch_c.from_builtin_tensor %onnx__MatMul_20055 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20067 = util.global.load @onnx__MatMul_20067 : tensor<3072x3072xbf16> %831 = torch_c.from_builtin_tensor %onnx__MatMul_20067 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20068 = util.global.load @onnx__MatMul_20068 : tensor<3072x3072xbf16> %832 = torch_c.from_builtin_tensor %onnx__MatMul_20068 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20069 = util.global.load @onnx__MatMul_20069 : tensor<3072x12288xbf16> %833 = torch_c.from_builtin_tensor %onnx__MatMul_20069 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20070 = util.global.load @onnx__MatMul_20070 : tensor<12288x3072xbf16> %834 = torch_c.from_builtin_tensor %onnx__MatMul_20070 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_20071 = util.global.load @onnx__MatMul_20071 : tensor<3072x12288xbf16> %835 = torch_c.from_builtin_tensor %onnx__MatMul_20071 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20072 = util.global.load @onnx__MatMul_20072 : tensor<12288x3072xbf16> %836 = torch_c.from_builtin_tensor %onnx__MatMul_20072 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_20073 = util.global.load @onnx__MatMul_20073 : tensor<3072x3072xbf16> %837 = torch_c.from_builtin_tensor %onnx__MatMul_20073 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20074 = util.global.load @onnx__MatMul_20074 : tensor<3072x3072xbf16> %838 = torch_c.from_builtin_tensor %onnx__MatMul_20074 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20075 = util.global.load @onnx__MatMul_20075 : tensor<3072x3072xbf16> %839 = torch_c.from_builtin_tensor %onnx__MatMul_20075 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20088 = util.global.load @onnx__MatMul_20088 : tensor<3072x3072xbf16> %840 = torch_c.from_builtin_tensor %onnx__MatMul_20088 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20089 = util.global.load @onnx__MatMul_20089 : tensor<3072x3072xbf16> %841 = torch_c.from_builtin_tensor %onnx__MatMul_20089 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20090 = util.global.load @onnx__MatMul_20090 : tensor<3072x3072xbf16> %842 = torch_c.from_builtin_tensor %onnx__MatMul_20090 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20102 = util.global.load @onnx__MatMul_20102 : tensor<3072x3072xbf16> %843 = torch_c.from_builtin_tensor %onnx__MatMul_20102 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20103 = util.global.load @onnx__MatMul_20103 : tensor<3072x3072xbf16> %844 = torch_c.from_builtin_tensor %onnx__MatMul_20103 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20104 = util.global.load @onnx__MatMul_20104 : tensor<3072x12288xbf16> %845 = torch_c.from_builtin_tensor %onnx__MatMul_20104 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20105 = util.global.load @onnx__MatMul_20105 : tensor<12288x3072xbf16> %846 = torch_c.from_builtin_tensor %onnx__MatMul_20105 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_20106 = util.global.load @onnx__MatMul_20106 : tensor<3072x12288xbf16> %847 = torch_c.from_builtin_tensor %onnx__MatMul_20106 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20107 = util.global.load @onnx__MatMul_20107 : tensor<12288x3072xbf16> %848 = torch_c.from_builtin_tensor %onnx__MatMul_20107 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_20108 = util.global.load @onnx__MatMul_20108 : tensor<3072x3072xbf16> %849 = torch_c.from_builtin_tensor %onnx__MatMul_20108 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20109 = util.global.load @onnx__MatMul_20109 : tensor<3072x3072xbf16> %850 = torch_c.from_builtin_tensor %onnx__MatMul_20109 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20110 = util.global.load @onnx__MatMul_20110 : tensor<3072x3072xbf16> %851 = torch_c.from_builtin_tensor %onnx__MatMul_20110 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20123 = util.global.load @onnx__MatMul_20123 : tensor<3072x3072xbf16> %852 = torch_c.from_builtin_tensor %onnx__MatMul_20123 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20124 = util.global.load @onnx__MatMul_20124 : tensor<3072x3072xbf16> %853 = torch_c.from_builtin_tensor %onnx__MatMul_20124 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20125 = util.global.load @onnx__MatMul_20125 : tensor<3072x3072xbf16> %854 = torch_c.from_builtin_tensor %onnx__MatMul_20125 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20137 = util.global.load @onnx__MatMul_20137 : tensor<3072x3072xbf16> %855 = torch_c.from_builtin_tensor %onnx__MatMul_20137 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20138 = util.global.load @onnx__MatMul_20138 : tensor<3072x3072xbf16> %856 = torch_c.from_builtin_tensor %onnx__MatMul_20138 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20139 = util.global.load @onnx__MatMul_20139 : tensor<3072x12288xbf16> %857 = torch_c.from_builtin_tensor %onnx__MatMul_20139 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20140 = util.global.load @onnx__MatMul_20140 : tensor<12288x3072xbf16> %858 = torch_c.from_builtin_tensor %onnx__MatMul_20140 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_20141 = util.global.load @onnx__MatMul_20141 : tensor<3072x12288xbf16> %859 = torch_c.from_builtin_tensor %onnx__MatMul_20141 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20142 = util.global.load @onnx__MatMul_20142 : tensor<12288x3072xbf16> %860 = torch_c.from_builtin_tensor %onnx__MatMul_20142 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_20143 = util.global.load @onnx__MatMul_20143 : tensor<3072x3072xbf16> %861 = torch_c.from_builtin_tensor %onnx__MatMul_20143 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20144 = util.global.load @onnx__MatMul_20144 : tensor<3072x3072xbf16> %862 = torch_c.from_builtin_tensor %onnx__MatMul_20144 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20145 = util.global.load @onnx__MatMul_20145 : tensor<3072x3072xbf16> %863 = torch_c.from_builtin_tensor %onnx__MatMul_20145 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20158 = util.global.load @onnx__MatMul_20158 : tensor<3072x3072xbf16> %864 = torch_c.from_builtin_tensor %onnx__MatMul_20158 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20159 = util.global.load @onnx__MatMul_20159 : tensor<3072x3072xbf16> %865 = torch_c.from_builtin_tensor %onnx__MatMul_20159 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20160 = util.global.load @onnx__MatMul_20160 : tensor<3072x3072xbf16> %866 = torch_c.from_builtin_tensor %onnx__MatMul_20160 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20172 = util.global.load @onnx__MatMul_20172 : tensor<3072x3072xbf16> %867 = torch_c.from_builtin_tensor %onnx__MatMul_20172 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20173 = util.global.load @onnx__MatMul_20173 : tensor<3072x3072xbf16> %868 = torch_c.from_builtin_tensor %onnx__MatMul_20173 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20174 = util.global.load @onnx__MatMul_20174 : tensor<3072x12288xbf16> %869 = torch_c.from_builtin_tensor %onnx__MatMul_20174 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20175 = util.global.load @onnx__MatMul_20175 : tensor<12288x3072xbf16> %870 = torch_c.from_builtin_tensor %onnx__MatMul_20175 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_20176 = util.global.load @onnx__MatMul_20176 : tensor<3072x12288xbf16> %871 = torch_c.from_builtin_tensor %onnx__MatMul_20176 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20177 = util.global.load @onnx__MatMul_20177 : tensor<12288x3072xbf16> %872 = torch_c.from_builtin_tensor %onnx__MatMul_20177 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_20178 = util.global.load @onnx__MatMul_20178 : tensor<3072x3072xbf16> %873 = torch_c.from_builtin_tensor %onnx__MatMul_20178 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20179 = util.global.load @onnx__MatMul_20179 : tensor<3072x3072xbf16> %874 = torch_c.from_builtin_tensor %onnx__MatMul_20179 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20180 = util.global.load @onnx__MatMul_20180 : tensor<3072x3072xbf16> %875 = torch_c.from_builtin_tensor %onnx__MatMul_20180 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20193 = util.global.load @onnx__MatMul_20193 : tensor<3072x3072xbf16> %876 = torch_c.from_builtin_tensor %onnx__MatMul_20193 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20194 = util.global.load @onnx__MatMul_20194 : tensor<3072x3072xbf16> %877 = torch_c.from_builtin_tensor %onnx__MatMul_20194 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20195 = util.global.load @onnx__MatMul_20195 : tensor<3072x3072xbf16> %878 = torch_c.from_builtin_tensor %onnx__MatMul_20195 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20207 = util.global.load @onnx__MatMul_20207 : tensor<3072x3072xbf16> %879 = torch_c.from_builtin_tensor %onnx__MatMul_20207 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20208 = util.global.load @onnx__MatMul_20208 : tensor<3072x3072xbf16> %880 = torch_c.from_builtin_tensor %onnx__MatMul_20208 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20209 = util.global.load @onnx__MatMul_20209 : tensor<3072x12288xbf16> %881 = torch_c.from_builtin_tensor %onnx__MatMul_20209 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20210 = util.global.load @onnx__MatMul_20210 : tensor<12288x3072xbf16> %882 = torch_c.from_builtin_tensor %onnx__MatMul_20210 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_20211 = util.global.load @onnx__MatMul_20211 : tensor<3072x12288xbf16> %883 = torch_c.from_builtin_tensor %onnx__MatMul_20211 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20212 = util.global.load @onnx__MatMul_20212 : tensor<12288x3072xbf16> %884 = torch_c.from_builtin_tensor %onnx__MatMul_20212 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_20213 = util.global.load @onnx__MatMul_20213 : tensor<3072x3072xbf16> %885 = torch_c.from_builtin_tensor %onnx__MatMul_20213 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20214 = util.global.load @onnx__MatMul_20214 : tensor<3072x3072xbf16> %886 = torch_c.from_builtin_tensor %onnx__MatMul_20214 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20215 = util.global.load @onnx__MatMul_20215 : tensor<3072x3072xbf16> %887 = torch_c.from_builtin_tensor %onnx__MatMul_20215 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20228 = util.global.load @onnx__MatMul_20228 : tensor<3072x3072xbf16> %888 = torch_c.from_builtin_tensor %onnx__MatMul_20228 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20229 = util.global.load @onnx__MatMul_20229 : tensor<3072x3072xbf16> %889 = torch_c.from_builtin_tensor %onnx__MatMul_20229 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20230 = util.global.load @onnx__MatMul_20230 : tensor<3072x3072xbf16> %890 = torch_c.from_builtin_tensor %onnx__MatMul_20230 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20242 = util.global.load @onnx__MatMul_20242 : tensor<3072x3072xbf16> %891 = torch_c.from_builtin_tensor %onnx__MatMul_20242 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20243 = util.global.load @onnx__MatMul_20243 : tensor<3072x3072xbf16> %892 = torch_c.from_builtin_tensor %onnx__MatMul_20243 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20244 = util.global.load @onnx__MatMul_20244 : tensor<3072x12288xbf16> %893 = torch_c.from_builtin_tensor %onnx__MatMul_20244 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20245 = util.global.load @onnx__MatMul_20245 : tensor<12288x3072xbf16> %894 = torch_c.from_builtin_tensor %onnx__MatMul_20245 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_20246 = util.global.load @onnx__MatMul_20246 : tensor<3072x12288xbf16> %895 = torch_c.from_builtin_tensor %onnx__MatMul_20246 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20247 = util.global.load @onnx__MatMul_20247 : tensor<12288x3072xbf16> %896 = torch_c.from_builtin_tensor %onnx__MatMul_20247 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_20248 = util.global.load @onnx__MatMul_20248 : tensor<3072x3072xbf16> %897 = torch_c.from_builtin_tensor %onnx__MatMul_20248 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20249 = util.global.load @onnx__MatMul_20249 : tensor<3072x3072xbf16> %898 = torch_c.from_builtin_tensor %onnx__MatMul_20249 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20250 = util.global.load @onnx__MatMul_20250 : tensor<3072x3072xbf16> %899 = torch_c.from_builtin_tensor %onnx__MatMul_20250 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20263 = util.global.load @onnx__MatMul_20263 : tensor<3072x3072xbf16> %900 = torch_c.from_builtin_tensor %onnx__MatMul_20263 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20264 = util.global.load @onnx__MatMul_20264 : tensor<3072x3072xbf16> %901 = torch_c.from_builtin_tensor %onnx__MatMul_20264 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20265 = util.global.load @onnx__MatMul_20265 : tensor<3072x3072xbf16> %902 = torch_c.from_builtin_tensor %onnx__MatMul_20265 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20277 = util.global.load @onnx__MatMul_20277 : tensor<3072x3072xbf16> %903 = torch_c.from_builtin_tensor %onnx__MatMul_20277 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20278 = util.global.load @onnx__MatMul_20278 : tensor<3072x3072xbf16> %904 = torch_c.from_builtin_tensor %onnx__MatMul_20278 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20279 = util.global.load @onnx__MatMul_20279 : tensor<3072x12288xbf16> %905 = torch_c.from_builtin_tensor %onnx__MatMul_20279 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20280 = util.global.load @onnx__MatMul_20280 : tensor<12288x3072xbf16> %906 = torch_c.from_builtin_tensor %onnx__MatMul_20280 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_20281 = util.global.load @onnx__MatMul_20281 : tensor<3072x12288xbf16> %907 = torch_c.from_builtin_tensor %onnx__MatMul_20281 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20282 = util.global.load @onnx__MatMul_20282 : tensor<12288x3072xbf16> %908 = torch_c.from_builtin_tensor %onnx__MatMul_20282 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_20283 = util.global.load @onnx__MatMul_20283 : tensor<3072x3072xbf16> %909 = torch_c.from_builtin_tensor %onnx__MatMul_20283 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20284 = util.global.load @onnx__MatMul_20284 : tensor<3072x3072xbf16> %910 = torch_c.from_builtin_tensor %onnx__MatMul_20284 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20285 = util.global.load @onnx__MatMul_20285 : tensor<3072x3072xbf16> %911 = torch_c.from_builtin_tensor %onnx__MatMul_20285 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20298 = util.global.load @onnx__MatMul_20298 : tensor<3072x3072xbf16> %912 = torch_c.from_builtin_tensor %onnx__MatMul_20298 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20299 = util.global.load @onnx__MatMul_20299 : tensor<3072x3072xbf16> %913 = torch_c.from_builtin_tensor %onnx__MatMul_20299 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20300 = util.global.load @onnx__MatMul_20300 : tensor<3072x3072xbf16> %914 = torch_c.from_builtin_tensor %onnx__MatMul_20300 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20312 = util.global.load @onnx__MatMul_20312 : tensor<3072x3072xbf16> %915 = torch_c.from_builtin_tensor %onnx__MatMul_20312 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20313 = util.global.load @onnx__MatMul_20313 : tensor<3072x3072xbf16> %916 = torch_c.from_builtin_tensor %onnx__MatMul_20313 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20314 = util.global.load @onnx__MatMul_20314 : tensor<3072x12288xbf16> %917 = torch_c.from_builtin_tensor %onnx__MatMul_20314 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20315 = util.global.load @onnx__MatMul_20315 : tensor<12288x3072xbf16> %918 = torch_c.from_builtin_tensor %onnx__MatMul_20315 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_20316 = util.global.load @onnx__MatMul_20316 : tensor<3072x12288xbf16> %919 = torch_c.from_builtin_tensor %onnx__MatMul_20316 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20317 = util.global.load @onnx__MatMul_20317 : tensor<12288x3072xbf16> %920 = torch_c.from_builtin_tensor %onnx__MatMul_20317 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_20318 = util.global.load @onnx__MatMul_20318 : tensor<3072x3072xbf16> %921 = torch_c.from_builtin_tensor %onnx__MatMul_20318 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20319 = util.global.load @onnx__MatMul_20319 : tensor<3072x3072xbf16> %922 = torch_c.from_builtin_tensor %onnx__MatMul_20319 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20320 = util.global.load @onnx__MatMul_20320 : tensor<3072x3072xbf16> %923 = torch_c.from_builtin_tensor %onnx__MatMul_20320 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20333 = util.global.load @onnx__MatMul_20333 : tensor<3072x3072xbf16> %924 = torch_c.from_builtin_tensor %onnx__MatMul_20333 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20334 = util.global.load @onnx__MatMul_20334 : tensor<3072x3072xbf16> %925 = torch_c.from_builtin_tensor %onnx__MatMul_20334 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20335 = util.global.load @onnx__MatMul_20335 : tensor<3072x3072xbf16> %926 = torch_c.from_builtin_tensor %onnx__MatMul_20335 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20347 = util.global.load @onnx__MatMul_20347 : tensor<3072x3072xbf16> %927 = torch_c.from_builtin_tensor %onnx__MatMul_20347 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20348 = util.global.load @onnx__MatMul_20348 : tensor<3072x3072xbf16> %928 = torch_c.from_builtin_tensor %onnx__MatMul_20348 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20349 = util.global.load @onnx__MatMul_20349 : tensor<3072x12288xbf16> %929 = torch_c.from_builtin_tensor %onnx__MatMul_20349 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20350 = util.global.load @onnx__MatMul_20350 : tensor<12288x3072xbf16> %930 = torch_c.from_builtin_tensor %onnx__MatMul_20350 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_20351 = util.global.load @onnx__MatMul_20351 : tensor<3072x12288xbf16> %931 = torch_c.from_builtin_tensor %onnx__MatMul_20351 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20352 = util.global.load @onnx__MatMul_20352 : tensor<12288x3072xbf16> %932 = torch_c.from_builtin_tensor %onnx__MatMul_20352 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_20353 = util.global.load @onnx__MatMul_20353 : tensor<3072x3072xbf16> %933 = torch_c.from_builtin_tensor %onnx__MatMul_20353 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20354 = util.global.load @onnx__MatMul_20354 : tensor<3072x3072xbf16> %934 = torch_c.from_builtin_tensor %onnx__MatMul_20354 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20355 = util.global.load @onnx__MatMul_20355 : tensor<3072x3072xbf16> %935 = torch_c.from_builtin_tensor %onnx__MatMul_20355 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20368 = util.global.load @onnx__MatMul_20368 : tensor<3072x3072xbf16> %936 = torch_c.from_builtin_tensor %onnx__MatMul_20368 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20369 = util.global.load @onnx__MatMul_20369 : tensor<3072x3072xbf16> %937 = torch_c.from_builtin_tensor %onnx__MatMul_20369 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20370 = util.global.load @onnx__MatMul_20370 : tensor<3072x3072xbf16> %938 = torch_c.from_builtin_tensor %onnx__MatMul_20370 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20382 = util.global.load @onnx__MatMul_20382 : tensor<3072x3072xbf16> %939 = torch_c.from_builtin_tensor %onnx__MatMul_20382 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20383 = util.global.load @onnx__MatMul_20383 : tensor<3072x3072xbf16> %940 = torch_c.from_builtin_tensor %onnx__MatMul_20383 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20384 = util.global.load @onnx__MatMul_20384 : tensor<3072x12288xbf16> %941 = torch_c.from_builtin_tensor %onnx__MatMul_20384 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20385 = util.global.load @onnx__MatMul_20385 : tensor<12288x3072xbf16> %942 = torch_c.from_builtin_tensor %onnx__MatMul_20385 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_20386 = util.global.load @onnx__MatMul_20386 : tensor<3072x12288xbf16> %943 = torch_c.from_builtin_tensor %onnx__MatMul_20386 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20387 = util.global.load @onnx__MatMul_20387 : tensor<12288x3072xbf16> %944 = torch_c.from_builtin_tensor %onnx__MatMul_20387 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_20388 = util.global.load @onnx__MatMul_20388 : tensor<3072x3072xbf16> %945 = torch_c.from_builtin_tensor %onnx__MatMul_20388 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20389 = util.global.load @onnx__MatMul_20389 : tensor<3072x3072xbf16> %946 = torch_c.from_builtin_tensor %onnx__MatMul_20389 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20390 = util.global.load @onnx__MatMul_20390 : tensor<3072x3072xbf16> %947 = torch_c.from_builtin_tensor %onnx__MatMul_20390 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20403 = util.global.load @onnx__MatMul_20403 : tensor<3072x3072xbf16> %948 = torch_c.from_builtin_tensor %onnx__MatMul_20403 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20404 = util.global.load @onnx__MatMul_20404 : tensor<3072x3072xbf16> %949 = torch_c.from_builtin_tensor %onnx__MatMul_20404 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20405 = util.global.load @onnx__MatMul_20405 : tensor<3072x3072xbf16> %950 = torch_c.from_builtin_tensor %onnx__MatMul_20405 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20417 = util.global.load @onnx__MatMul_20417 : tensor<3072x3072xbf16> %951 = torch_c.from_builtin_tensor %onnx__MatMul_20417 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20418 = util.global.load @onnx__MatMul_20418 : tensor<3072x3072xbf16> %952 = torch_c.from_builtin_tensor %onnx__MatMul_20418 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20419 = util.global.load @onnx__MatMul_20419 : tensor<3072x12288xbf16> %953 = torch_c.from_builtin_tensor %onnx__MatMul_20419 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20420 = util.global.load @onnx__MatMul_20420 : tensor<12288x3072xbf16> %954 = torch_c.from_builtin_tensor %onnx__MatMul_20420 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_20421 = util.global.load @onnx__MatMul_20421 : tensor<3072x12288xbf16> %955 = torch_c.from_builtin_tensor %onnx__MatMul_20421 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20422 = util.global.load @onnx__MatMul_20422 : tensor<12288x3072xbf16> %956 = torch_c.from_builtin_tensor %onnx__MatMul_20422 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_20423 = util.global.load @onnx__MatMul_20423 : tensor<3072x3072xbf16> %957 = torch_c.from_builtin_tensor %onnx__MatMul_20423 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20424 = util.global.load @onnx__MatMul_20424 : tensor<3072x3072xbf16> %958 = torch_c.from_builtin_tensor %onnx__MatMul_20424 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20425 = util.global.load @onnx__MatMul_20425 : tensor<3072x3072xbf16> %959 = torch_c.from_builtin_tensor %onnx__MatMul_20425 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20438 = util.global.load @onnx__MatMul_20438 : tensor<3072x3072xbf16> %960 = torch_c.from_builtin_tensor %onnx__MatMul_20438 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20439 = util.global.load @onnx__MatMul_20439 : tensor<3072x3072xbf16> %961 = torch_c.from_builtin_tensor %onnx__MatMul_20439 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20440 = util.global.load @onnx__MatMul_20440 : tensor<3072x3072xbf16> %962 = torch_c.from_builtin_tensor %onnx__MatMul_20440 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20452 = util.global.load @onnx__MatMul_20452 : tensor<3072x3072xbf16> %963 = torch_c.from_builtin_tensor %onnx__MatMul_20452 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20453 = util.global.load @onnx__MatMul_20453 : tensor<3072x3072xbf16> %964 = torch_c.from_builtin_tensor %onnx__MatMul_20453 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20454 = util.global.load @onnx__MatMul_20454 : tensor<3072x12288xbf16> %965 = torch_c.from_builtin_tensor %onnx__MatMul_20454 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20455 = util.global.load @onnx__MatMul_20455 : tensor<12288x3072xbf16> %966 = torch_c.from_builtin_tensor %onnx__MatMul_20455 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_20456 = util.global.load @onnx__MatMul_20456 : tensor<3072x12288xbf16> %967 = torch_c.from_builtin_tensor %onnx__MatMul_20456 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20457 = util.global.load @onnx__MatMul_20457 : tensor<12288x3072xbf16> %968 = torch_c.from_builtin_tensor %onnx__MatMul_20457 : tensor<12288x3072xbf16> -> !torch.vtensor<[12288,3072],bf16> %onnx__MatMul_20458 = util.global.load @onnx__MatMul_20458 : tensor<3072x12288xbf16> %969 = torch_c.from_builtin_tensor %onnx__MatMul_20458 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20459 = util.global.load @onnx__MatMul_20459 : tensor<3072x3072xbf16> %970 = torch_c.from_builtin_tensor %onnx__MatMul_20459 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20460 = util.global.load @onnx__MatMul_20460 : tensor<3072x3072xbf16> %971 = torch_c.from_builtin_tensor %onnx__MatMul_20460 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20461 = util.global.load @onnx__MatMul_20461 : tensor<3072x3072xbf16> %972 = torch_c.from_builtin_tensor %onnx__MatMul_20461 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20473 = util.global.load @onnx__MatMul_20473 : tensor<15360x3072xbf16> %973 = torch_c.from_builtin_tensor %onnx__MatMul_20473 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_20474 = util.global.load @onnx__MatMul_20474 : tensor<3072x12288xbf16> %974 = torch_c.from_builtin_tensor %onnx__MatMul_20474 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20475 = util.global.load @onnx__MatMul_20475 : tensor<3072x3072xbf16> %975 = torch_c.from_builtin_tensor %onnx__MatMul_20475 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20476 = util.global.load @onnx__MatMul_20476 : tensor<3072x3072xbf16> %976 = torch_c.from_builtin_tensor %onnx__MatMul_20476 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20477 = util.global.load @onnx__MatMul_20477 : tensor<3072x3072xbf16> %977 = torch_c.from_builtin_tensor %onnx__MatMul_20477 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20489 = util.global.load @onnx__MatMul_20489 : tensor<15360x3072xbf16> %978 = torch_c.from_builtin_tensor %onnx__MatMul_20489 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_20490 = util.global.load @onnx__MatMul_20490 : tensor<3072x12288xbf16> %979 = torch_c.from_builtin_tensor %onnx__MatMul_20490 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20491 = util.global.load @onnx__MatMul_20491 : tensor<3072x3072xbf16> %980 = torch_c.from_builtin_tensor %onnx__MatMul_20491 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20492 = util.global.load @onnx__MatMul_20492 : tensor<3072x3072xbf16> %981 = torch_c.from_builtin_tensor %onnx__MatMul_20492 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20493 = util.global.load @onnx__MatMul_20493 : tensor<3072x3072xbf16> %982 = torch_c.from_builtin_tensor %onnx__MatMul_20493 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20505 = util.global.load @onnx__MatMul_20505 : tensor<15360x3072xbf16> %983 = torch_c.from_builtin_tensor %onnx__MatMul_20505 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_20506 = util.global.load @onnx__MatMul_20506 : tensor<3072x12288xbf16> %984 = torch_c.from_builtin_tensor %onnx__MatMul_20506 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20507 = util.global.load @onnx__MatMul_20507 : tensor<3072x3072xbf16> %985 = torch_c.from_builtin_tensor %onnx__MatMul_20507 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20508 = util.global.load @onnx__MatMul_20508 : tensor<3072x3072xbf16> %986 = torch_c.from_builtin_tensor %onnx__MatMul_20508 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20509 = util.global.load @onnx__MatMul_20509 : tensor<3072x3072xbf16> %987 = torch_c.from_builtin_tensor %onnx__MatMul_20509 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20521 = util.global.load @onnx__MatMul_20521 : tensor<15360x3072xbf16> %988 = torch_c.from_builtin_tensor %onnx__MatMul_20521 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_20522 = util.global.load @onnx__MatMul_20522 : tensor<3072x12288xbf16> %989 = torch_c.from_builtin_tensor %onnx__MatMul_20522 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20523 = util.global.load @onnx__MatMul_20523 : tensor<3072x3072xbf16> %990 = torch_c.from_builtin_tensor %onnx__MatMul_20523 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20524 = util.global.load @onnx__MatMul_20524 : tensor<3072x3072xbf16> %991 = torch_c.from_builtin_tensor %onnx__MatMul_20524 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20525 = util.global.load @onnx__MatMul_20525 : tensor<3072x3072xbf16> %992 = torch_c.from_builtin_tensor %onnx__MatMul_20525 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20537 = util.global.load @onnx__MatMul_20537 : tensor<15360x3072xbf16> %993 = torch_c.from_builtin_tensor %onnx__MatMul_20537 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_20538 = util.global.load @onnx__MatMul_20538 : tensor<3072x12288xbf16> %994 = torch_c.from_builtin_tensor %onnx__MatMul_20538 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20539 = util.global.load @onnx__MatMul_20539 : tensor<3072x3072xbf16> %995 = torch_c.from_builtin_tensor %onnx__MatMul_20539 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20540 = util.global.load @onnx__MatMul_20540 : tensor<3072x3072xbf16> %996 = torch_c.from_builtin_tensor %onnx__MatMul_20540 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20541 = util.global.load @onnx__MatMul_20541 : tensor<3072x3072xbf16> %997 = torch_c.from_builtin_tensor %onnx__MatMul_20541 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20553 = util.global.load @onnx__MatMul_20553 : tensor<15360x3072xbf16> %998 = torch_c.from_builtin_tensor %onnx__MatMul_20553 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_20554 = util.global.load @onnx__MatMul_20554 : tensor<3072x12288xbf16> %999 = torch_c.from_builtin_tensor %onnx__MatMul_20554 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20555 = util.global.load @onnx__MatMul_20555 : tensor<3072x3072xbf16> %1000 = torch_c.from_builtin_tensor %onnx__MatMul_20555 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20556 = util.global.load @onnx__MatMul_20556 : tensor<3072x3072xbf16> %1001 = torch_c.from_builtin_tensor %onnx__MatMul_20556 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20557 = util.global.load @onnx__MatMul_20557 : tensor<3072x3072xbf16> %1002 = torch_c.from_builtin_tensor %onnx__MatMul_20557 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20569 = util.global.load @onnx__MatMul_20569 : tensor<15360x3072xbf16> %1003 = torch_c.from_builtin_tensor %onnx__MatMul_20569 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_20570 = util.global.load @onnx__MatMul_20570 : tensor<3072x12288xbf16> %1004 = torch_c.from_builtin_tensor %onnx__MatMul_20570 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20571 = util.global.load @onnx__MatMul_20571 : tensor<3072x3072xbf16> %1005 = torch_c.from_builtin_tensor %onnx__MatMul_20571 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20572 = util.global.load @onnx__MatMul_20572 : tensor<3072x3072xbf16> %1006 = torch_c.from_builtin_tensor %onnx__MatMul_20572 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20573 = util.global.load @onnx__MatMul_20573 : tensor<3072x3072xbf16> %1007 = torch_c.from_builtin_tensor %onnx__MatMul_20573 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20585 = util.global.load @onnx__MatMul_20585 : tensor<15360x3072xbf16> %1008 = torch_c.from_builtin_tensor %onnx__MatMul_20585 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_20586 = util.global.load @onnx__MatMul_20586 : tensor<3072x12288xbf16> %1009 = torch_c.from_builtin_tensor %onnx__MatMul_20586 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20587 = util.global.load @onnx__MatMul_20587 : tensor<3072x3072xbf16> %1010 = torch_c.from_builtin_tensor %onnx__MatMul_20587 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20588 = util.global.load @onnx__MatMul_20588 : tensor<3072x3072xbf16> %1011 = torch_c.from_builtin_tensor %onnx__MatMul_20588 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20589 = util.global.load @onnx__MatMul_20589 : tensor<3072x3072xbf16> %1012 = torch_c.from_builtin_tensor %onnx__MatMul_20589 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20601 = util.global.load @onnx__MatMul_20601 : tensor<15360x3072xbf16> %1013 = torch_c.from_builtin_tensor %onnx__MatMul_20601 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_20602 = util.global.load @onnx__MatMul_20602 : tensor<3072x12288xbf16> %1014 = torch_c.from_builtin_tensor %onnx__MatMul_20602 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20603 = util.global.load @onnx__MatMul_20603 : tensor<3072x3072xbf16> %1015 = torch_c.from_builtin_tensor %onnx__MatMul_20603 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20604 = util.global.load @onnx__MatMul_20604 : tensor<3072x3072xbf16> %1016 = torch_c.from_builtin_tensor %onnx__MatMul_20604 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20605 = util.global.load @onnx__MatMul_20605 : tensor<3072x3072xbf16> %1017 = torch_c.from_builtin_tensor %onnx__MatMul_20605 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20617 = util.global.load @onnx__MatMul_20617 : tensor<15360x3072xbf16> %1018 = torch_c.from_builtin_tensor %onnx__MatMul_20617 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_20618 = util.global.load @onnx__MatMul_20618 : tensor<3072x12288xbf16> %1019 = torch_c.from_builtin_tensor %onnx__MatMul_20618 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20619 = util.global.load @onnx__MatMul_20619 : tensor<3072x3072xbf16> %1020 = torch_c.from_builtin_tensor %onnx__MatMul_20619 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20620 = util.global.load @onnx__MatMul_20620 : tensor<3072x3072xbf16> %1021 = torch_c.from_builtin_tensor %onnx__MatMul_20620 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20621 = util.global.load @onnx__MatMul_20621 : tensor<3072x3072xbf16> %1022 = torch_c.from_builtin_tensor %onnx__MatMul_20621 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20633 = util.global.load @onnx__MatMul_20633 : tensor<15360x3072xbf16> %1023 = torch_c.from_builtin_tensor %onnx__MatMul_20633 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_20634 = util.global.load @onnx__MatMul_20634 : tensor<3072x12288xbf16> %1024 = torch_c.from_builtin_tensor %onnx__MatMul_20634 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20635 = util.global.load @onnx__MatMul_20635 : tensor<3072x3072xbf16> %1025 = torch_c.from_builtin_tensor %onnx__MatMul_20635 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20636 = util.global.load @onnx__MatMul_20636 : tensor<3072x3072xbf16> %1026 = torch_c.from_builtin_tensor %onnx__MatMul_20636 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20637 = util.global.load @onnx__MatMul_20637 : tensor<3072x3072xbf16> %1027 = torch_c.from_builtin_tensor %onnx__MatMul_20637 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20649 = util.global.load @onnx__MatMul_20649 : tensor<15360x3072xbf16> %1028 = torch_c.from_builtin_tensor %onnx__MatMul_20649 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_20650 = util.global.load @onnx__MatMul_20650 : tensor<3072x12288xbf16> %1029 = torch_c.from_builtin_tensor %onnx__MatMul_20650 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20651 = util.global.load @onnx__MatMul_20651 : tensor<3072x3072xbf16> %1030 = torch_c.from_builtin_tensor %onnx__MatMul_20651 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20652 = util.global.load @onnx__MatMul_20652 : tensor<3072x3072xbf16> %1031 = torch_c.from_builtin_tensor %onnx__MatMul_20652 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20653 = util.global.load @onnx__MatMul_20653 : tensor<3072x3072xbf16> %1032 = torch_c.from_builtin_tensor %onnx__MatMul_20653 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20665 = util.global.load @onnx__MatMul_20665 : tensor<15360x3072xbf16> %1033 = torch_c.from_builtin_tensor %onnx__MatMul_20665 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_20666 = util.global.load @onnx__MatMul_20666 : tensor<3072x12288xbf16> %1034 = torch_c.from_builtin_tensor %onnx__MatMul_20666 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20667 = util.global.load @onnx__MatMul_20667 : tensor<3072x3072xbf16> %1035 = torch_c.from_builtin_tensor %onnx__MatMul_20667 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20668 = util.global.load @onnx__MatMul_20668 : tensor<3072x3072xbf16> %1036 = torch_c.from_builtin_tensor %onnx__MatMul_20668 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20669 = util.global.load @onnx__MatMul_20669 : tensor<3072x3072xbf16> %1037 = torch_c.from_builtin_tensor %onnx__MatMul_20669 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20681 = util.global.load @onnx__MatMul_20681 : tensor<15360x3072xbf16> %1038 = torch_c.from_builtin_tensor %onnx__MatMul_20681 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_20682 = util.global.load @onnx__MatMul_20682 : tensor<3072x12288xbf16> %1039 = torch_c.from_builtin_tensor %onnx__MatMul_20682 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20683 = util.global.load @onnx__MatMul_20683 : tensor<3072x3072xbf16> %1040 = torch_c.from_builtin_tensor %onnx__MatMul_20683 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20684 = util.global.load @onnx__MatMul_20684 : tensor<3072x3072xbf16> %1041 = torch_c.from_builtin_tensor %onnx__MatMul_20684 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20685 = util.global.load @onnx__MatMul_20685 : tensor<3072x3072xbf16> %1042 = torch_c.from_builtin_tensor %onnx__MatMul_20685 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20697 = util.global.load @onnx__MatMul_20697 : tensor<15360x3072xbf16> %1043 = torch_c.from_builtin_tensor %onnx__MatMul_20697 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_20698 = util.global.load @onnx__MatMul_20698 : tensor<3072x12288xbf16> %1044 = torch_c.from_builtin_tensor %onnx__MatMul_20698 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20699 = util.global.load @onnx__MatMul_20699 : tensor<3072x3072xbf16> %1045 = torch_c.from_builtin_tensor %onnx__MatMul_20699 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20700 = util.global.load @onnx__MatMul_20700 : tensor<3072x3072xbf16> %1046 = torch_c.from_builtin_tensor %onnx__MatMul_20700 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20701 = util.global.load @onnx__MatMul_20701 : tensor<3072x3072xbf16> %1047 = torch_c.from_builtin_tensor %onnx__MatMul_20701 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20713 = util.global.load @onnx__MatMul_20713 : tensor<15360x3072xbf16> %1048 = torch_c.from_builtin_tensor %onnx__MatMul_20713 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_20714 = util.global.load @onnx__MatMul_20714 : tensor<3072x12288xbf16> %1049 = torch_c.from_builtin_tensor %onnx__MatMul_20714 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20715 = util.global.load @onnx__MatMul_20715 : tensor<3072x3072xbf16> %1050 = torch_c.from_builtin_tensor %onnx__MatMul_20715 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20716 = util.global.load @onnx__MatMul_20716 : tensor<3072x3072xbf16> %1051 = torch_c.from_builtin_tensor %onnx__MatMul_20716 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20717 = util.global.load @onnx__MatMul_20717 : tensor<3072x3072xbf16> %1052 = torch_c.from_builtin_tensor %onnx__MatMul_20717 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20729 = util.global.load @onnx__MatMul_20729 : tensor<15360x3072xbf16> %1053 = torch_c.from_builtin_tensor %onnx__MatMul_20729 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_20730 = util.global.load @onnx__MatMul_20730 : tensor<3072x12288xbf16> %1054 = torch_c.from_builtin_tensor %onnx__MatMul_20730 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20731 = util.global.load @onnx__MatMul_20731 : tensor<3072x3072xbf16> %1055 = torch_c.from_builtin_tensor %onnx__MatMul_20731 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20732 = util.global.load @onnx__MatMul_20732 : tensor<3072x3072xbf16> %1056 = torch_c.from_builtin_tensor %onnx__MatMul_20732 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20733 = util.global.load @onnx__MatMul_20733 : tensor<3072x3072xbf16> %1057 = torch_c.from_builtin_tensor %onnx__MatMul_20733 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20745 = util.global.load @onnx__MatMul_20745 : tensor<15360x3072xbf16> %1058 = torch_c.from_builtin_tensor %onnx__MatMul_20745 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_20746 = util.global.load @onnx__MatMul_20746 : tensor<3072x12288xbf16> %1059 = torch_c.from_builtin_tensor %onnx__MatMul_20746 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20747 = util.global.load @onnx__MatMul_20747 : tensor<3072x3072xbf16> %1060 = torch_c.from_builtin_tensor %onnx__MatMul_20747 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20748 = util.global.load @onnx__MatMul_20748 : tensor<3072x3072xbf16> %1061 = torch_c.from_builtin_tensor %onnx__MatMul_20748 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20749 = util.global.load @onnx__MatMul_20749 : tensor<3072x3072xbf16> %1062 = torch_c.from_builtin_tensor %onnx__MatMul_20749 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20761 = util.global.load @onnx__MatMul_20761 : tensor<15360x3072xbf16> %1063 = torch_c.from_builtin_tensor %onnx__MatMul_20761 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_20762 = util.global.load @onnx__MatMul_20762 : tensor<3072x12288xbf16> %1064 = torch_c.from_builtin_tensor %onnx__MatMul_20762 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20763 = util.global.load @onnx__MatMul_20763 : tensor<3072x3072xbf16> %1065 = torch_c.from_builtin_tensor %onnx__MatMul_20763 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20764 = util.global.load @onnx__MatMul_20764 : tensor<3072x3072xbf16> %1066 = torch_c.from_builtin_tensor %onnx__MatMul_20764 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20765 = util.global.load @onnx__MatMul_20765 : tensor<3072x3072xbf16> %1067 = torch_c.from_builtin_tensor %onnx__MatMul_20765 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20777 = util.global.load @onnx__MatMul_20777 : tensor<15360x3072xbf16> %1068 = torch_c.from_builtin_tensor %onnx__MatMul_20777 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_20778 = util.global.load @onnx__MatMul_20778 : tensor<3072x12288xbf16> %1069 = torch_c.from_builtin_tensor %onnx__MatMul_20778 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20779 = util.global.load @onnx__MatMul_20779 : tensor<3072x3072xbf16> %1070 = torch_c.from_builtin_tensor %onnx__MatMul_20779 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20780 = util.global.load @onnx__MatMul_20780 : tensor<3072x3072xbf16> %1071 = torch_c.from_builtin_tensor %onnx__MatMul_20780 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20781 = util.global.load @onnx__MatMul_20781 : tensor<3072x3072xbf16> %1072 = torch_c.from_builtin_tensor %onnx__MatMul_20781 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20793 = util.global.load @onnx__MatMul_20793 : tensor<15360x3072xbf16> %1073 = torch_c.from_builtin_tensor %onnx__MatMul_20793 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_20794 = util.global.load @onnx__MatMul_20794 : tensor<3072x12288xbf16> %1074 = torch_c.from_builtin_tensor %onnx__MatMul_20794 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20795 = util.global.load @onnx__MatMul_20795 : tensor<3072x3072xbf16> %1075 = torch_c.from_builtin_tensor %onnx__MatMul_20795 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20796 = util.global.load @onnx__MatMul_20796 : tensor<3072x3072xbf16> %1076 = torch_c.from_builtin_tensor %onnx__MatMul_20796 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20797 = util.global.load @onnx__MatMul_20797 : tensor<3072x3072xbf16> %1077 = torch_c.from_builtin_tensor %onnx__MatMul_20797 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20809 = util.global.load @onnx__MatMul_20809 : tensor<15360x3072xbf16> %1078 = torch_c.from_builtin_tensor %onnx__MatMul_20809 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_20810 = util.global.load @onnx__MatMul_20810 : tensor<3072x12288xbf16> %1079 = torch_c.from_builtin_tensor %onnx__MatMul_20810 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20811 = util.global.load @onnx__MatMul_20811 : tensor<3072x3072xbf16> %1080 = torch_c.from_builtin_tensor %onnx__MatMul_20811 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20812 = util.global.load @onnx__MatMul_20812 : tensor<3072x3072xbf16> %1081 = torch_c.from_builtin_tensor %onnx__MatMul_20812 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20813 = util.global.load @onnx__MatMul_20813 : tensor<3072x3072xbf16> %1082 = torch_c.from_builtin_tensor %onnx__MatMul_20813 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20825 = util.global.load @onnx__MatMul_20825 : tensor<15360x3072xbf16> %1083 = torch_c.from_builtin_tensor %onnx__MatMul_20825 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_20826 = util.global.load @onnx__MatMul_20826 : tensor<3072x12288xbf16> %1084 = torch_c.from_builtin_tensor %onnx__MatMul_20826 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20827 = util.global.load @onnx__MatMul_20827 : tensor<3072x3072xbf16> %1085 = torch_c.from_builtin_tensor %onnx__MatMul_20827 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20828 = util.global.load @onnx__MatMul_20828 : tensor<3072x3072xbf16> %1086 = torch_c.from_builtin_tensor %onnx__MatMul_20828 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20829 = util.global.load @onnx__MatMul_20829 : tensor<3072x3072xbf16> %1087 = torch_c.from_builtin_tensor %onnx__MatMul_20829 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20841 = util.global.load @onnx__MatMul_20841 : tensor<15360x3072xbf16> %1088 = torch_c.from_builtin_tensor %onnx__MatMul_20841 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_20842 = util.global.load @onnx__MatMul_20842 : tensor<3072x12288xbf16> %1089 = torch_c.from_builtin_tensor %onnx__MatMul_20842 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20843 = util.global.load @onnx__MatMul_20843 : tensor<3072x3072xbf16> %1090 = torch_c.from_builtin_tensor %onnx__MatMul_20843 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20844 = util.global.load @onnx__MatMul_20844 : tensor<3072x3072xbf16> %1091 = torch_c.from_builtin_tensor %onnx__MatMul_20844 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20845 = util.global.load @onnx__MatMul_20845 : tensor<3072x3072xbf16> %1092 = torch_c.from_builtin_tensor %onnx__MatMul_20845 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20857 = util.global.load @onnx__MatMul_20857 : tensor<15360x3072xbf16> %1093 = torch_c.from_builtin_tensor %onnx__MatMul_20857 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_20858 = util.global.load @onnx__MatMul_20858 : tensor<3072x12288xbf16> %1094 = torch_c.from_builtin_tensor %onnx__MatMul_20858 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20859 = util.global.load @onnx__MatMul_20859 : tensor<3072x3072xbf16> %1095 = torch_c.from_builtin_tensor %onnx__MatMul_20859 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20860 = util.global.load @onnx__MatMul_20860 : tensor<3072x3072xbf16> %1096 = torch_c.from_builtin_tensor %onnx__MatMul_20860 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20861 = util.global.load @onnx__MatMul_20861 : tensor<3072x3072xbf16> %1097 = torch_c.from_builtin_tensor %onnx__MatMul_20861 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20873 = util.global.load @onnx__MatMul_20873 : tensor<15360x3072xbf16> %1098 = torch_c.from_builtin_tensor %onnx__MatMul_20873 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_20874 = util.global.load @onnx__MatMul_20874 : tensor<3072x12288xbf16> %1099 = torch_c.from_builtin_tensor %onnx__MatMul_20874 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20875 = util.global.load @onnx__MatMul_20875 : tensor<3072x3072xbf16> %1100 = torch_c.from_builtin_tensor %onnx__MatMul_20875 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20876 = util.global.load @onnx__MatMul_20876 : tensor<3072x3072xbf16> %1101 = torch_c.from_builtin_tensor %onnx__MatMul_20876 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20877 = util.global.load @onnx__MatMul_20877 : tensor<3072x3072xbf16> %1102 = torch_c.from_builtin_tensor %onnx__MatMul_20877 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20889 = util.global.load @onnx__MatMul_20889 : tensor<15360x3072xbf16> %1103 = torch_c.from_builtin_tensor %onnx__MatMul_20889 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_20890 = util.global.load @onnx__MatMul_20890 : tensor<3072x12288xbf16> %1104 = torch_c.from_builtin_tensor %onnx__MatMul_20890 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20891 = util.global.load @onnx__MatMul_20891 : tensor<3072x3072xbf16> %1105 = torch_c.from_builtin_tensor %onnx__MatMul_20891 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20892 = util.global.load @onnx__MatMul_20892 : tensor<3072x3072xbf16> %1106 = torch_c.from_builtin_tensor %onnx__MatMul_20892 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20893 = util.global.load @onnx__MatMul_20893 : tensor<3072x3072xbf16> %1107 = torch_c.from_builtin_tensor %onnx__MatMul_20893 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20905 = util.global.load @onnx__MatMul_20905 : tensor<15360x3072xbf16> %1108 = torch_c.from_builtin_tensor %onnx__MatMul_20905 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_20906 = util.global.load @onnx__MatMul_20906 : tensor<3072x12288xbf16> %1109 = torch_c.from_builtin_tensor %onnx__MatMul_20906 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20907 = util.global.load @onnx__MatMul_20907 : tensor<3072x3072xbf16> %1110 = torch_c.from_builtin_tensor %onnx__MatMul_20907 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20908 = util.global.load @onnx__MatMul_20908 : tensor<3072x3072xbf16> %1111 = torch_c.from_builtin_tensor %onnx__MatMul_20908 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20909 = util.global.load @onnx__MatMul_20909 : tensor<3072x3072xbf16> %1112 = torch_c.from_builtin_tensor %onnx__MatMul_20909 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20921 = util.global.load @onnx__MatMul_20921 : tensor<15360x3072xbf16> %1113 = torch_c.from_builtin_tensor %onnx__MatMul_20921 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_20922 = util.global.load @onnx__MatMul_20922 : tensor<3072x12288xbf16> %1114 = torch_c.from_builtin_tensor %onnx__MatMul_20922 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20923 = util.global.load @onnx__MatMul_20923 : tensor<3072x3072xbf16> %1115 = torch_c.from_builtin_tensor %onnx__MatMul_20923 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20924 = util.global.load @onnx__MatMul_20924 : tensor<3072x3072xbf16> %1116 = torch_c.from_builtin_tensor %onnx__MatMul_20924 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20925 = util.global.load @onnx__MatMul_20925 : tensor<3072x3072xbf16> %1117 = torch_c.from_builtin_tensor %onnx__MatMul_20925 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20937 = util.global.load @onnx__MatMul_20937 : tensor<15360x3072xbf16> %1118 = torch_c.from_builtin_tensor %onnx__MatMul_20937 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_20938 = util.global.load @onnx__MatMul_20938 : tensor<3072x12288xbf16> %1119 = torch_c.from_builtin_tensor %onnx__MatMul_20938 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20939 = util.global.load @onnx__MatMul_20939 : tensor<3072x3072xbf16> %1120 = torch_c.from_builtin_tensor %onnx__MatMul_20939 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20940 = util.global.load @onnx__MatMul_20940 : tensor<3072x3072xbf16> %1121 = torch_c.from_builtin_tensor %onnx__MatMul_20940 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20941 = util.global.load @onnx__MatMul_20941 : tensor<3072x3072xbf16> %1122 = torch_c.from_builtin_tensor %onnx__MatMul_20941 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20953 = util.global.load @onnx__MatMul_20953 : tensor<15360x3072xbf16> %1123 = torch_c.from_builtin_tensor %onnx__MatMul_20953 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_20954 = util.global.load @onnx__MatMul_20954 : tensor<3072x12288xbf16> %1124 = torch_c.from_builtin_tensor %onnx__MatMul_20954 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20955 = util.global.load @onnx__MatMul_20955 : tensor<3072x3072xbf16> %1125 = torch_c.from_builtin_tensor %onnx__MatMul_20955 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20956 = util.global.load @onnx__MatMul_20956 : tensor<3072x3072xbf16> %1126 = torch_c.from_builtin_tensor %onnx__MatMul_20956 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20957 = util.global.load @onnx__MatMul_20957 : tensor<3072x3072xbf16> %1127 = torch_c.from_builtin_tensor %onnx__MatMul_20957 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20969 = util.global.load @onnx__MatMul_20969 : tensor<15360x3072xbf16> %1128 = torch_c.from_builtin_tensor %onnx__MatMul_20969 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_20970 = util.global.load @onnx__MatMul_20970 : tensor<3072x12288xbf16> %1129 = torch_c.from_builtin_tensor %onnx__MatMul_20970 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20971 = util.global.load @onnx__MatMul_20971 : tensor<3072x3072xbf16> %1130 = torch_c.from_builtin_tensor %onnx__MatMul_20971 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20972 = util.global.load @onnx__MatMul_20972 : tensor<3072x3072xbf16> %1131 = torch_c.from_builtin_tensor %onnx__MatMul_20972 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20973 = util.global.load @onnx__MatMul_20973 : tensor<3072x3072xbf16> %1132 = torch_c.from_builtin_tensor %onnx__MatMul_20973 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20985 = util.global.load @onnx__MatMul_20985 : tensor<15360x3072xbf16> %1133 = torch_c.from_builtin_tensor %onnx__MatMul_20985 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_20986 = util.global.load @onnx__MatMul_20986 : tensor<3072x12288xbf16> %1134 = torch_c.from_builtin_tensor %onnx__MatMul_20986 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_20987 = util.global.load @onnx__MatMul_20987 : tensor<3072x3072xbf16> %1135 = torch_c.from_builtin_tensor %onnx__MatMul_20987 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20988 = util.global.load @onnx__MatMul_20988 : tensor<3072x3072xbf16> %1136 = torch_c.from_builtin_tensor %onnx__MatMul_20988 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_20989 = util.global.load @onnx__MatMul_20989 : tensor<3072x3072xbf16> %1137 = torch_c.from_builtin_tensor %onnx__MatMul_20989 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_21001 = util.global.load @onnx__MatMul_21001 : tensor<15360x3072xbf16> %1138 = torch_c.from_builtin_tensor %onnx__MatMul_21001 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_21002 = util.global.load @onnx__MatMul_21002 : tensor<3072x12288xbf16> %1139 = torch_c.from_builtin_tensor %onnx__MatMul_21002 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_21003 = util.global.load @onnx__MatMul_21003 : tensor<3072x3072xbf16> %1140 = torch_c.from_builtin_tensor %onnx__MatMul_21003 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_21004 = util.global.load @onnx__MatMul_21004 : tensor<3072x3072xbf16> %1141 = torch_c.from_builtin_tensor %onnx__MatMul_21004 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_21005 = util.global.load @onnx__MatMul_21005 : tensor<3072x3072xbf16> %1142 = torch_c.from_builtin_tensor %onnx__MatMul_21005 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_21017 = util.global.load @onnx__MatMul_21017 : tensor<15360x3072xbf16> %1143 = torch_c.from_builtin_tensor %onnx__MatMul_21017 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_21018 = util.global.load @onnx__MatMul_21018 : tensor<3072x12288xbf16> %1144 = torch_c.from_builtin_tensor %onnx__MatMul_21018 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_21019 = util.global.load @onnx__MatMul_21019 : tensor<3072x3072xbf16> %1145 = torch_c.from_builtin_tensor %onnx__MatMul_21019 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_21020 = util.global.load @onnx__MatMul_21020 : tensor<3072x3072xbf16> %1146 = torch_c.from_builtin_tensor %onnx__MatMul_21020 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_21021 = util.global.load @onnx__MatMul_21021 : tensor<3072x3072xbf16> %1147 = torch_c.from_builtin_tensor %onnx__MatMul_21021 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_21033 = util.global.load @onnx__MatMul_21033 : tensor<15360x3072xbf16> %1148 = torch_c.from_builtin_tensor %onnx__MatMul_21033 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_21034 = util.global.load @onnx__MatMul_21034 : tensor<3072x12288xbf16> %1149 = torch_c.from_builtin_tensor %onnx__MatMul_21034 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_21035 = util.global.load @onnx__MatMul_21035 : tensor<3072x3072xbf16> %1150 = torch_c.from_builtin_tensor %onnx__MatMul_21035 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_21036 = util.global.load @onnx__MatMul_21036 : tensor<3072x3072xbf16> %1151 = torch_c.from_builtin_tensor %onnx__MatMul_21036 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_21037 = util.global.load @onnx__MatMul_21037 : tensor<3072x3072xbf16> %1152 = torch_c.from_builtin_tensor %onnx__MatMul_21037 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_21049 = util.global.load @onnx__MatMul_21049 : tensor<15360x3072xbf16> %1153 = torch_c.from_builtin_tensor %onnx__MatMul_21049 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_21050 = util.global.load @onnx__MatMul_21050 : tensor<3072x12288xbf16> %1154 = torch_c.from_builtin_tensor %onnx__MatMul_21050 : tensor<3072x12288xbf16> -> !torch.vtensor<[3072,12288],bf16> %onnx__MatMul_21051 = util.global.load @onnx__MatMul_21051 : tensor<3072x3072xbf16> %1155 = torch_c.from_builtin_tensor %onnx__MatMul_21051 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_21052 = util.global.load @onnx__MatMul_21052 : tensor<3072x3072xbf16> %1156 = torch_c.from_builtin_tensor %onnx__MatMul_21052 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_21053 = util.global.load @onnx__MatMul_21053 : tensor<3072x3072xbf16> %1157 = torch_c.from_builtin_tensor %onnx__MatMul_21053 : tensor<3072x3072xbf16> -> !torch.vtensor<[3072,3072],bf16> %onnx__MatMul_21065 = util.global.load @onnx__MatMul_21065 : tensor<15360x3072xbf16> %1158 = torch_c.from_builtin_tensor %onnx__MatMul_21065 : tensor<15360x3072xbf16> -> !torch.vtensor<[15360,3072],bf16> %onnx__MatMul_21069 = util.global.load @onnx__MatMul_21069 : tensor<3072x64xbf16> %1159 = torch_c.from_builtin_tensor %onnx__MatMul_21069 : tensor<3072x64xbf16> -> !torch.vtensor<[3072,64],bf16> %none = torch.constant.none %1160 = torch.operator "onnx.MatMul"(%arg0, %739) : (!torch.vtensor<[1,4096,64],bf16>, !torch.vtensor<[64,3072],bf16>) -> !torch.vtensor<[1,4096,3072],bf16> %1161 = torch.operator "onnx.Add"(%13, %1160) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[1,4096,3072],bf16>) -> !torch.vtensor<[1,4096,3072],bf16> %1162 = torch.operator "onnx.Cast"(%arg3) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %1163 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %1164 = torch.operator "onnx.Mul"(%1162, %1163) : (!torch.vtensor<[1],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[1],bf16> %1165 = torch.operator "onnx.Cast"(%arg6) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %1166 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %1167 = torch.operator "onnx.Mul"(%1165, %1166) : (!torch.vtensor<[1],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[1],bf16> %1168 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__time_text_embed_time_proj_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %1169 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__time_text_embed_time_proj_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %1170 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__time_text_embed_time_proj_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1171 = torch.operator "onnx.Unsqueeze"(%1164, %1170) : (!torch.vtensor<[1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1,1],bf16> %1172 = torch.operator "onnx.Cast"(%1171) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1,1],bf16>) -> !torch.vtensor<[1,1],f32> %_2Ftime_text_embed2Ftime_proj2FConstant_3_attr__value = util.global.load @"/time_text_embed/time_proj/Constant_3_attr__value" : tensor<1x128xf32> %1173 = torch_c.from_builtin_tensor %_2Ftime_text_embed2Ftime_proj2FConstant_3_attr__value : tensor<1x128xf32> -> !torch.vtensor<[1,128],f32> %1174 = torch.operator "onnx.Mul"(%1172, %1173) : (!torch.vtensor<[1,1],f32>, !torch.vtensor<[1,128],f32>) -> !torch.vtensor<[1,128],f32> %1175 = torch.operator "onnx.Sin"(%1174) : (!torch.vtensor<[1,128],f32>) -> !torch.vtensor<[1,128],f32> %1176 = torch.operator "onnx.Cos"(%1174) : (!torch.vtensor<[1,128],f32>) -> !torch.vtensor<[1,128],f32> %1177 = torch.operator "onnx.Concat"(%1175, %1176) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[1,128],f32>, !torch.vtensor<[1,128],f32>) -> !torch.vtensor<[1,256],f32> %1178 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__time_text_embed_time_proj_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1179 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__time_text_embed_time_proj_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1180 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__time_text_embed_time_proj_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1181 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__time_text_embed_time_proj_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1182 = torch.operator "onnx.Slice"(%1177, %1179, %1180, %1178, %1181) : (!torch.vtensor<[1,256],f32>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1,128],f32> %1183 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__time_text_embed_time_proj_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1184 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__time_text_embed_time_proj_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1185 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__time_text_embed_time_proj_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1186 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__time_text_embed_time_proj_Constant_11_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1187 = torch.operator "onnx.Slice"(%1177, %1184, %1185, %1183, %1186) : (!torch.vtensor<[1,256],f32>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1,128],f32> %1188 = torch.operator "onnx.Concat"(%1182, %1187) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[1,128],f32>, !torch.vtensor<[1,128],f32>) -> !torch.vtensor<[1,256],f32> %1189 = torch.operator "onnx.Cast"(%1188) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1,256],f32>) -> !torch.vtensor<[1,256],bf16> %1190 = torch.operator "onnx.Gemm"(%1189, %0, %1) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,256],bf16>, !torch.vtensor<[3072,256],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[1,3072],bf16> %1191 = torch.operator "onnx.Sigmoid"(%1190) : (!torch.vtensor<[1,3072],bf16>) -> !torch.vtensor<[1,3072],bf16> %1192 = torch.operator "onnx.Mul"(%1190, %1191) : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[1,3072],bf16>) -> !torch.vtensor<[1,3072],bf16> %1193 = torch.operator "onnx.Gemm"(%1192, %2, %3) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[3072,3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[1,3072],bf16> %1194 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__time_text_embed_time_proj_1_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1195 = torch.operator "onnx.Unsqueeze"(%1167, %1194) : (!torch.vtensor<[1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1,1],bf16> %1196 = torch.operator "onnx.Cast"(%1195) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1,1],bf16>) -> !torch.vtensor<[1,1],f32> %_2Ftime_text_embed2Ftime_proj_12FConstant_1_attr__value = util.global.load @"/time_text_embed/time_proj_1/Constant_1_attr__value" : tensor<1x128xf32> %1197 = torch_c.from_builtin_tensor %_2Ftime_text_embed2Ftime_proj_12FConstant_1_attr__value : tensor<1x128xf32> -> !torch.vtensor<[1,128],f32> %1198 = torch.operator "onnx.Mul"(%1196, %1197) : (!torch.vtensor<[1,1],f32>, !torch.vtensor<[1,128],f32>) -> !torch.vtensor<[1,128],f32> %1199 = torch.operator "onnx.Sin"(%1198) : (!torch.vtensor<[1,128],f32>) -> !torch.vtensor<[1,128],f32> %1200 = torch.operator "onnx.Cos"(%1198) : (!torch.vtensor<[1,128],f32>) -> !torch.vtensor<[1,128],f32> %1201 = torch.operator "onnx.Concat"(%1199, %1200) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[1,128],f32>, !torch.vtensor<[1,128],f32>) -> !torch.vtensor<[1,256],f32> %1202 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__time_text_embed_time_proj_1_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1203 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__time_text_embed_time_proj_1_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1204 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__time_text_embed_time_proj_1_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1205 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__time_text_embed_time_proj_1_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1206 = torch.operator "onnx.Slice"(%1201, %1203, %1204, %1202, %1205) : (!torch.vtensor<[1,256],f32>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1,128],f32> %1207 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__time_text_embed_time_proj_1_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1208 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__time_text_embed_time_proj_1_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1209 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__time_text_embed_time_proj_1_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1210 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__time_text_embed_time_proj_1_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1211 = torch.operator "onnx.Slice"(%1201, %1208, %1209, %1207, %1210) : (!torch.vtensor<[1,256],f32>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1,128],f32> %1212 = torch.operator "onnx.Concat"(%1206, %1211) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[1,128],f32>, !torch.vtensor<[1,128],f32>) -> !torch.vtensor<[1,256],f32> %1213 = torch.operator "onnx.Cast"(%1212) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1,256],f32>) -> !torch.vtensor<[1,256],bf16> %1214 = torch.operator "onnx.Gemm"(%1213, %4, %5) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,256],bf16>, !torch.vtensor<[3072,256],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[1,3072],bf16> %1215 = torch.operator "onnx.Sigmoid"(%1214) : (!torch.vtensor<[1,3072],bf16>) -> !torch.vtensor<[1,3072],bf16> %1216 = torch.operator "onnx.Mul"(%1214, %1215) : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[1,3072],bf16>) -> !torch.vtensor<[1,3072],bf16> %1217 = torch.operator "onnx.Gemm"(%1216, %6, %7) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[3072,3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[1,3072],bf16> %1218 = torch.operator "onnx.Add"(%1193, %1217) : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[1,3072],bf16>) -> !torch.vtensor<[1,3072],bf16> %1219 = torch.operator "onnx.Gemm"(%arg2, %8, %9) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,768],bf16>, !torch.vtensor<[3072,768],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[1,3072],bf16> %1220 = torch.operator "onnx.Sigmoid"(%1219) : (!torch.vtensor<[1,3072],bf16>) -> !torch.vtensor<[1,3072],bf16> %1221 = torch.operator "onnx.Mul"(%1219, %1220) : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[1,3072],bf16>) -> !torch.vtensor<[1,3072],bf16> %1222 = torch.operator "onnx.Gemm"(%1221, %10, %11) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[3072,3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[1,3072],bf16> %1223 = torch.operator "onnx.Add"(%1218, %1222) : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[1,3072],bf16>) -> !torch.vtensor<[1,3072],bf16> %1224 = torch.operator "onnx.MatMul"(%arg1, %740) : (!torch.vtensor<[1,512,4096],bf16>, !torch.vtensor<[4096,3072],bf16>) -> !torch.vtensor<[1,512,3072],bf16> %1225 = torch.operator "onnx.Add"(%12, %1224) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[1,512,3072],bf16>) -> !torch.vtensor<[1,512,3072],bf16> %1226 = torch.operator "onnx.Concat"(%arg5, %arg4) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[512,3],bf16>, !torch.vtensor<[4096,3],bf16>) -> !torch.vtensor<[4608,3],bf16> %1227 = torch.operator "onnx.Cast"(%1226) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[4608,3],bf16>) -> !torch.vtensor<[4608,3],f32> %1228 = torch.operator "onnx.Gather"(%1227, %1168) {torch.onnx.axis = 1 : si64} : (!torch.vtensor<[4608,3],f32>, !torch.vtensor<[],si64>) -> !torch.vtensor<[4608],f32> %1229 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__pos_embed_Constant_attr__value> : tensor<8xf32>} : () -> !torch.vtensor<[8],f32> %1230 = torch.operator "onnx.Einsum"(%1228, %1229) {torch.onnx.equation = "i,j->ij"} : (!torch.vtensor<[4608],f32>, !torch.vtensor<[8],f32>) -> !torch.vtensor<[4608,8],f32> %1231 = torch.operator "onnx.Cos"(%1230) : (!torch.vtensor<[4608,8],f32>) -> !torch.vtensor<[4608,8],f32> %1232 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__pos_embed_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %1233 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__pos_embed_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1234 = torch.operator "onnx.Unsqueeze"(%1231, %1233) : (!torch.vtensor<[4608,8],f32>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4608,8,1],f32> %1235 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__pos_embed_Constant_3_attr__value> : tensor<3xsi64>} : () -> !torch.vtensor<[3],si64> %1236 = torch.operator "onnx.Tile"(%1234, %1235) : (!torch.vtensor<[4608,8,1],f32>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[4608,8,2],f32> %1237 = torch.operator "onnx.Flatten"(%1236) {torch.onnx.axis = 1 : si64} : (!torch.vtensor<[4608,8,2],f32>) -> !torch.vtensor<[4608,16],f32> %1238 = torch.operator "onnx.Cast"(%1237) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[4608,16],f32>) -> !torch.vtensor<[4608,16],f32> %1239 = torch.operator "onnx.Sin"(%1230) : (!torch.vtensor<[4608,8],f32>) -> !torch.vtensor<[4608,8],f32> %1240 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__pos_embed_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1241 = torch.operator "onnx.Unsqueeze"(%1239, %1240) : (!torch.vtensor<[4608,8],f32>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4608,8,1],f32> %1242 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__pos_embed_Constant_5_attr__value> : tensor<3xsi64>} : () -> !torch.vtensor<[3],si64> %1243 = torch.operator "onnx.Tile"(%1241, %1242) : (!torch.vtensor<[4608,8,1],f32>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[4608,8,2],f32> %1244 = torch.operator "onnx.Flatten"(%1243) {torch.onnx.axis = 1 : si64} : (!torch.vtensor<[4608,8,2],f32>) -> !torch.vtensor<[4608,16],f32> %1245 = torch.operator "onnx.Cast"(%1244) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[4608,16],f32>) -> !torch.vtensor<[4608,16],f32> %1246 = torch.operator "onnx.Gather"(%1227, %1169) {torch.onnx.axis = 1 : si64} : (!torch.vtensor<[4608,3],f32>, !torch.vtensor<[],si64>) -> !torch.vtensor<[4608],f32> %1247 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__pos_embed_Constant_6_attr__value> : tensor<28xf32>} : () -> !torch.vtensor<[28],f32> %1248 = torch.operator "onnx.Einsum"(%1246, %1247) {torch.onnx.equation = "i,j->ij"} : (!torch.vtensor<[4608],f32>, !torch.vtensor<[28],f32>) -> !torch.vtensor<[4608,28],f32> %1249 = torch.operator "onnx.Cos"(%1248) : (!torch.vtensor<[4608,28],f32>) -> !torch.vtensor<[4608,28],f32> %1250 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__pos_embed_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1251 = torch.operator "onnx.Unsqueeze"(%1249, %1250) : (!torch.vtensor<[4608,28],f32>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4608,28,1],f32> %1252 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__pos_embed_Constant_8_attr__value> : tensor<3xsi64>} : () -> !torch.vtensor<[3],si64> %1253 = torch.operator "onnx.Tile"(%1251, %1252) : (!torch.vtensor<[4608,28,1],f32>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[4608,28,2],f32> %1254 = torch.operator "onnx.Flatten"(%1253) {torch.onnx.axis = 1 : si64} : (!torch.vtensor<[4608,28,2],f32>) -> !torch.vtensor<[4608,56],f32> %1255 = torch.operator "onnx.Cast"(%1254) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[4608,56],f32>) -> !torch.vtensor<[4608,56],f32> %1256 = torch.operator "onnx.Sin"(%1248) : (!torch.vtensor<[4608,28],f32>) -> !torch.vtensor<[4608,28],f32> %1257 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__pos_embed_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1258 = torch.operator "onnx.Unsqueeze"(%1256, %1257) : (!torch.vtensor<[4608,28],f32>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4608,28,1],f32> %1259 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__pos_embed_Constant_10_attr__value> : tensor<3xsi64>} : () -> !torch.vtensor<[3],si64> %1260 = torch.operator "onnx.Tile"(%1258, %1259) : (!torch.vtensor<[4608,28,1],f32>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[4608,28,2],f32> %1261 = torch.operator "onnx.Flatten"(%1260) {torch.onnx.axis = 1 : si64} : (!torch.vtensor<[4608,28,2],f32>) -> !torch.vtensor<[4608,56],f32> %1262 = torch.operator "onnx.Cast"(%1261) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[4608,56],f32>) -> !torch.vtensor<[4608,56],f32> %1263 = torch.operator "onnx.Gather"(%1227, %1232) {torch.onnx.axis = 1 : si64} : (!torch.vtensor<[4608,3],f32>, !torch.vtensor<[],si64>) -> !torch.vtensor<[4608],f32> %1264 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__pos_embed_Constant_11_attr__value> : tensor<28xf32>} : () -> !torch.vtensor<[28],f32> %1265 = torch.operator "onnx.Einsum"(%1263, %1264) {torch.onnx.equation = "i,j->ij"} : (!torch.vtensor<[4608],f32>, !torch.vtensor<[28],f32>) -> !torch.vtensor<[4608,28],f32> %1266 = torch.operator "onnx.Cos"(%1265) : (!torch.vtensor<[4608,28],f32>) -> !torch.vtensor<[4608,28],f32> %1267 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__pos_embed_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1268 = torch.operator "onnx.Unsqueeze"(%1266, %1267) : (!torch.vtensor<[4608,28],f32>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4608,28,1],f32> %1269 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__pos_embed_Constant_13_attr__value> : tensor<3xsi64>} : () -> !torch.vtensor<[3],si64> %1270 = torch.operator "onnx.Tile"(%1268, %1269) : (!torch.vtensor<[4608,28,1],f32>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[4608,28,2],f32> %1271 = torch.operator "onnx.Flatten"(%1270) {torch.onnx.axis = 1 : si64} : (!torch.vtensor<[4608,28,2],f32>) -> !torch.vtensor<[4608,56],f32> %1272 = torch.operator "onnx.Cast"(%1271) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[4608,56],f32>) -> !torch.vtensor<[4608,56],f32> %1273 = torch.operator "onnx.Sin"(%1265) : (!torch.vtensor<[4608,28],f32>) -> !torch.vtensor<[4608,28],f32> %1274 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__pos_embed_Constant_14_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1275 = torch.operator "onnx.Unsqueeze"(%1273, %1274) : (!torch.vtensor<[4608,28],f32>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4608,28,1],f32> %1276 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__pos_embed_Constant_15_attr__value> : tensor<3xsi64>} : () -> !torch.vtensor<[3],si64> %1277 = torch.operator "onnx.Tile"(%1275, %1276) : (!torch.vtensor<[4608,28,1],f32>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[4608,28,2],f32> %1278 = torch.operator "onnx.Flatten"(%1277) {torch.onnx.axis = 1 : si64} : (!torch.vtensor<[4608,28,2],f32>) -> !torch.vtensor<[4608,56],f32> %1279 = torch.operator "onnx.Cast"(%1278) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[4608,56],f32>) -> !torch.vtensor<[4608,56],f32> %1280 = torch.operator "onnx.Concat"(%1238, %1255, %1272) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[4608,16],f32>, !torch.vtensor<[4608,56],f32>, !torch.vtensor<[4608,56],f32>) -> !torch.vtensor<[4608,128],f32> %1281 = torch.operator "onnx.Cast"(%1280) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[4608,128],f32>) -> !torch.vtensor<[4608,128],f32> %1282 = torch.operator "onnx.Concat"(%1245, %1262, %1279) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[4608,16],f32>, !torch.vtensor<[4608,56],f32>, !torch.vtensor<[4608,56],f32>) -> !torch.vtensor<[4608,128],f32> %1283 = torch.operator "onnx.Cast"(%1282) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[4608,128],f32>) -> !torch.vtensor<[4608,128],f32> %1284 = torch.operator "onnx.Sigmoid"(%1223) : (!torch.vtensor<[1,3072],bf16>) -> !torch.vtensor<[1,3072],bf16> %1285 = torch.operator "onnx.Mul"(%1223, %1284) : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[1,3072],bf16>) -> !torch.vtensor<[1,3072],bf16> %1286 = torch.operator "onnx.Gemm"(%1285, %14, %15) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %1287 = torch.operator "onnx.Shape"(%1286) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %1288 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_norm1_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1289 = torch.operator "onnx.Gather"(%1287, %1288) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1290 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_norm1_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1291 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_norm1_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1292 = torch.operator "onnx.Add"(%1289, %1291) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1293 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_norm1_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1294 = torch.operator "onnx.Div"(%1292, %1293) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1295 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_norm1_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1296 = torch.operator "onnx.Mul"(%1294, %1295) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1297 = torch.operator "onnx.Slice"(%1286, %1290, %1296, %1288) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %1298 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_norm1_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1299 = torch.operator "onnx.Mul"(%1294, %1298) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1300 = torch.operator "onnx.Slice"(%1286, %1296, %1299, %1288) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %1301 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_norm1_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1302 = torch.operator "onnx.Mul"(%1294, %1301) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1303 = torch.operator "onnx.Slice"(%1286, %1299, %1302, %1288) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %1304 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_norm1_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1305 = torch.operator "onnx.Mul"(%1294, %1304) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1306 = torch.operator "onnx.Slice"(%1286, %1302, %1305, %1288) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %1307 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_norm1_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1308 = torch.operator "onnx.Mul"(%1294, %1307) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1309 = torch.operator "onnx.Slice"(%1286, %1305, %1308, %1288) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %1310 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_norm1_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1311 = torch.operator "onnx.Mul"(%1294, %1310) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1312 = torch.operator "onnx.Slice"(%1286, %1308, %1311, %1288) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.02Fnorm12Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.0/norm1/norm/Constant_attr__value" : tensor<3072xbf16> %1313 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.02Fnorm12Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.02Fnorm12Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.0/norm1/norm/Constant_1_attr__value" : tensor<3072xbf16> %1314 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.02Fnorm12Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %1315 = torch.operator "onnx.LayerNormalization"(%1161, %1313, %1314) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[1,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[1,4096,3072],bf16> %1316 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_norm1_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1317 = torch.operator "onnx.Unsqueeze"(%1300, %1316) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %1318 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_norm1_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %1319 = torch.operator "onnx.Add"(%1317, %1318) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %1320 = torch.operator "onnx.Mul"(%1315, %1319) : (!torch.vtensor<[1,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %1321 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_norm1_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1322 = torch.operator "onnx.Unsqueeze"(%1297, %1321) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %1323 = torch.operator "onnx.Add"(%1320, %1322) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %1324 = torch.operator "onnx.Gemm"(%1285, %16, %17) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %1325 = torch.operator "onnx.Shape"(%1324) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %1326 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_norm1_context_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1327 = torch.operator "onnx.Gather"(%1325, %1326) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1328 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_norm1_context_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1329 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_norm1_context_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1330 = torch.operator "onnx.Add"(%1327, %1329) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1331 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_norm1_context_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1332 = torch.operator "onnx.Div"(%1330, %1331) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1333 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_norm1_context_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1334 = torch.operator "onnx.Mul"(%1332, %1333) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1335 = torch.operator "onnx.Slice"(%1324, %1328, %1334, %1326) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %1336 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_norm1_context_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1337 = torch.operator "onnx.Mul"(%1332, %1336) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1338 = torch.operator "onnx.Slice"(%1324, %1334, %1337, %1326) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %1339 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_norm1_context_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1340 = torch.operator "onnx.Mul"(%1332, %1339) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1341 = torch.operator "onnx.Slice"(%1324, %1337, %1340, %1326) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %1342 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_norm1_context_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1343 = torch.operator "onnx.Mul"(%1332, %1342) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1344 = torch.operator "onnx.Slice"(%1324, %1340, %1343, %1326) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %1345 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_norm1_context_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1346 = torch.operator "onnx.Mul"(%1332, %1345) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1347 = torch.operator "onnx.Slice"(%1324, %1343, %1346, %1326) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %1348 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_norm1_context_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1349 = torch.operator "onnx.Mul"(%1332, %1348) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1350 = torch.operator "onnx.Slice"(%1324, %1346, %1349, %1326) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.02Fnorm1_context2Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.0/norm1_context/norm/Constant_attr__value" : tensor<3072xbf16> %1351 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.02Fnorm1_context2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.02Fnorm1_context2Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.0/norm1_context/norm/Constant_1_attr__value" : tensor<3072xbf16> %1352 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.02Fnorm1_context2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %1353 = torch.operator "onnx.LayerNormalization"(%1225, %1351, %1352) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[1,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[1,512,3072],bf16> %1354 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_norm1_context_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1355 = torch.operator "onnx.Unsqueeze"(%1338, %1354) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %1356 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_norm1_context_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %1357 = torch.operator "onnx.Add"(%1355, %1356) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %1358 = torch.operator "onnx.Mul"(%1353, %1357) : (!torch.vtensor<[1,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %1359 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_norm1_context_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1360 = torch.operator "onnx.Unsqueeze"(%1335, %1359) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %1361 = torch.operator "onnx.Add"(%1358, %1360) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %1362 = torch.operator "onnx.MatMul"(%1323, %741) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %1363 = torch.operator "onnx.Add"(%20, %1362) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %1364 = torch.operator "onnx.MatMul"(%1323, %742) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %1365 = torch.operator "onnx.Add"(%21, %1364) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %1366 = torch.operator "onnx.MatMul"(%1323, %743) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %1367 = torch.operator "onnx.Add"(%22, %1366) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %1368 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_attr__value> : tensor<4xsi64>} : () -> !torch.vtensor<[4],si64> %1369 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_1_attr__value> : tensor<4xsi64>} : () -> !torch.vtensor<[4],si64> %1370 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_2_attr__value> : tensor<4xsi64>} : () -> !torch.vtensor<[4],si64> %1371 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_3_attr__value> : tensor<4xsi64>} : () -> !torch.vtensor<[4],si64> %1372 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_4_attr__value> : tensor<4xsi64>} : () -> !torch.vtensor<[4],si64> %1373 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_5_attr__value> : tensor<4xsi64>} : () -> !torch.vtensor<[4],si64> %1374 = torch.operator "onnx.Reshape"(%1363, %1368) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[1,?,24,128],bf16> %1375 = torch.operator "onnx.Transpose"(%1374) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[1,?,24,128],bf16>) -> !torch.vtensor<[1,24,?,128],bf16> %1376 = torch.operator "onnx.Reshape"(%1365, %1369) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[1,?,24,128],bf16> %1377 = torch.operator "onnx.Transpose"(%1376) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[1,?,24,128],bf16>) -> !torch.vtensor<[1,24,?,128],bf16> %1378 = torch.operator "onnx.Reshape"(%1367, %1370) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[1,?,24,128],bf16> %1379 = torch.operator "onnx.Transpose"(%1378) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[1,?,24,128],bf16>) -> !torch.vtensor<[1,24,?,128],bf16> %1380 = torch.operator "onnx.Cast"(%1375) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1,24,?,128],bf16>) -> !torch.vtensor<[1,24,?,128],f32> %1381 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %1382 = torch.operator "onnx.Pow"(%1380, %1381) : (!torch.vtensor<[1,24,?,128],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[1,24,?,128],f32> %1383 = torch.operator "onnx.ReduceMean"(%1382) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[1,24,?,128],f32>) -> !torch.vtensor<[1,24,?,1],f32> %1384 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %1385 = torch.operator "onnx.Add"(%1383, %1384) : (!torch.vtensor<[1,24,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[1,24,?,1],f32> %1386 = torch.operator "onnx.Sqrt"(%1385) : (!torch.vtensor<[1,24,?,1],f32>) -> !torch.vtensor<[1,24,?,1],f32> %1387 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %1388 = torch.operator "onnx.Div"(%1387, %1386) : (!torch.vtensor<[],f32>, !torch.vtensor<[1,24,?,1],f32>) -> !torch.vtensor<[1,24,?,1],f32> %1389 = torch.operator "onnx.Cast"(%1375) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1,24,?,128],bf16>) -> !torch.vtensor<[1,24,?,128],f32> %1390 = torch.operator "onnx.Mul"(%1389, %1388) : (!torch.vtensor<[1,24,?,128],f32>, !torch.vtensor<[1,24,?,1],f32>) -> !torch.vtensor<[1,24,?,128],f32> %1391 = torch.operator "onnx.Cast"(%1390) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1,24,?,128],f32>) -> !torch.vtensor<[1,24,?,128],bf16> %1392 = torch.operator "onnx.Mul"(%1391, %18) : (!torch.vtensor<[1,24,?,128],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[1,24,?,128],bf16> %1393 = torch.operator "onnx.Cast"(%1377) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1,24,?,128],bf16>) -> !torch.vtensor<[1,24,?,128],f32> %1394 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %1395 = torch.operator "onnx.Pow"(%1393, %1394) : (!torch.vtensor<[1,24,?,128],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[1,24,?,128],f32> %1396 = torch.operator "onnx.ReduceMean"(%1395) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[1,24,?,128],f32>) -> !torch.vtensor<[1,24,?,1],f32> %1397 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %1398 = torch.operator "onnx.Add"(%1396, %1397) : (!torch.vtensor<[1,24,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[1,24,?,1],f32> %1399 = torch.operator "onnx.Sqrt"(%1398) : (!torch.vtensor<[1,24,?,1],f32>) -> !torch.vtensor<[1,24,?,1],f32> %1400 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %1401 = torch.operator "onnx.Div"(%1400, %1399) : (!torch.vtensor<[],f32>, !torch.vtensor<[1,24,?,1],f32>) -> !torch.vtensor<[1,24,?,1],f32> %1402 = torch.operator "onnx.Cast"(%1377) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1,24,?,128],bf16>) -> !torch.vtensor<[1,24,?,128],f32> %1403 = torch.operator "onnx.Mul"(%1402, %1401) : (!torch.vtensor<[1,24,?,128],f32>, !torch.vtensor<[1,24,?,1],f32>) -> !torch.vtensor<[1,24,?,128],f32> %1404 = torch.operator "onnx.Cast"(%1403) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1,24,?,128],f32>) -> !torch.vtensor<[1,24,?,128],bf16> %1405 = torch.operator "onnx.Mul"(%1404, %19) : (!torch.vtensor<[1,24,?,128],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[1,24,?,128],bf16> %1406 = torch.operator "onnx.MatMul"(%1361, %744) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %1407 = torch.operator "onnx.Add"(%25, %1406) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %1408 = torch.operator "onnx.MatMul"(%1361, %745) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %1409 = torch.operator "onnx.Add"(%23, %1408) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %1410 = torch.operator "onnx.MatMul"(%1361, %746) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %1411 = torch.operator "onnx.Add"(%24, %1410) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %1412 = torch.operator "onnx.Reshape"(%1407, %1371) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[1,?,24,128],bf16> %1413 = torch.operator "onnx.Transpose"(%1412) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[1,?,24,128],bf16>) -> !torch.vtensor<[1,24,?,128],bf16> %1414 = torch.operator "onnx.Reshape"(%1409, %1372) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[1,?,24,128],bf16> %1415 = torch.operator "onnx.Transpose"(%1414) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[1,?,24,128],bf16>) -> !torch.vtensor<[1,24,?,128],bf16> %1416 = torch.operator "onnx.Reshape"(%1411, %1373) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[1,?,24,128],bf16> %1417 = torch.operator "onnx.Transpose"(%1416) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[1,?,24,128],bf16>) -> !torch.vtensor<[1,24,?,128],bf16> %1418 = torch.operator "onnx.Cast"(%1413) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1,24,?,128],bf16>) -> !torch.vtensor<[1,24,?,128],f32> %1419 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_norm_added_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %1420 = torch.operator "onnx.Pow"(%1418, %1419) : (!torch.vtensor<[1,24,?,128],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[1,24,?,128],f32> %1421 = torch.operator "onnx.ReduceMean"(%1420) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[1,24,?,128],f32>) -> !torch.vtensor<[1,24,?,1],f32> %1422 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_norm_added_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %1423 = torch.operator "onnx.Add"(%1421, %1422) : (!torch.vtensor<[1,24,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[1,24,?,1],f32> %1424 = torch.operator "onnx.Sqrt"(%1423) : (!torch.vtensor<[1,24,?,1],f32>) -> !torch.vtensor<[1,24,?,1],f32> %1425 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_norm_added_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %1426 = torch.operator "onnx.Div"(%1425, %1424) : (!torch.vtensor<[],f32>, !torch.vtensor<[1,24,?,1],f32>) -> !torch.vtensor<[1,24,?,1],f32> %1427 = torch.operator "onnx.Cast"(%1413) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1,24,?,128],bf16>) -> !torch.vtensor<[1,24,?,128],f32> %1428 = torch.operator "onnx.Mul"(%1427, %1426) : (!torch.vtensor<[1,24,?,128],f32>, !torch.vtensor<[1,24,?,1],f32>) -> !torch.vtensor<[1,24,?,128],f32> %1429 = torch.operator "onnx.Cast"(%1428) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1,24,?,128],f32>) -> !torch.vtensor<[1,24,?,128],bf16> %1430 = torch.operator "onnx.Mul"(%1429, %28) : (!torch.vtensor<[1,24,?,128],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[1,24,?,128],bf16> %1431 = torch.operator "onnx.Cast"(%1415) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1,24,?,128],bf16>) -> !torch.vtensor<[1,24,?,128],f32> %1432 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_norm_added_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %1433 = torch.operator "onnx.Pow"(%1431, %1432) : (!torch.vtensor<[1,24,?,128],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[1,24,?,128],f32> %1434 = torch.operator "onnx.ReduceMean"(%1433) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[1,24,?,128],f32>) -> !torch.vtensor<[1,24,?,1],f32> %1435 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_norm_added_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %1436 = torch.operator "onnx.Add"(%1434, %1435) : (!torch.vtensor<[1,24,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[1,24,?,1],f32> %1437 = torch.operator "onnx.Sqrt"(%1436) : (!torch.vtensor<[1,24,?,1],f32>) -> !torch.vtensor<[1,24,?,1],f32> %1438 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_norm_added_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %1439 = torch.operator "onnx.Div"(%1438, %1437) : (!torch.vtensor<[],f32>, !torch.vtensor<[1,24,?,1],f32>) -> !torch.vtensor<[1,24,?,1],f32> %1440 = torch.operator "onnx.Cast"(%1415) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1,24,?,128],bf16>) -> !torch.vtensor<[1,24,?,128],f32> %1441 = torch.operator "onnx.Mul"(%1440, %1439) : (!torch.vtensor<[1,24,?,128],f32>, !torch.vtensor<[1,24,?,1],f32>) -> !torch.vtensor<[1,24,?,128],f32> %1442 = torch.operator "onnx.Cast"(%1441) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1,24,?,128],f32>) -> !torch.vtensor<[1,24,?,128],bf16> %1443 = torch.operator "onnx.Mul"(%1442, %29) : (!torch.vtensor<[1,24,?,128],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[1,24,?,128],bf16> %1444 = torch.operator "onnx.Concat"(%1430, %1392) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[1,24,?,128],bf16>, !torch.vtensor<[1,24,?,128],bf16>) -> !torch.vtensor<[1,24,?,128],bf16> %1445 = torch.operator "onnx.Concat"(%1443, %1405) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[1,24,?,128],bf16>, !torch.vtensor<[1,24,?,128],bf16>) -> !torch.vtensor<[1,24,?,128],bf16> %1446 = torch.operator "onnx.Concat"(%1417, %1379) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[1,24,?,128],bf16>, !torch.vtensor<[1,24,?,128],bf16>) -> !torch.vtensor<[1,24,?,128],bf16> %1447 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1448 = torch.operator "onnx.Unsqueeze"(%1281, %1447) : (!torch.vtensor<[4608,128],f32>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1,4608,128],f32> %1449 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1450 = torch.operator "onnx.Unsqueeze"(%1448, %1449) : (!torch.vtensor<[1,4608,128],f32>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1,1,4608,128],f32> %1451 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1452 = torch.operator "onnx.Unsqueeze"(%1283, %1451) : (!torch.vtensor<[4608,128],f32>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1,4608,128],f32> %1453 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1454 = torch.operator "onnx.Unsqueeze"(%1452, %1453) : (!torch.vtensor<[1,4608,128],f32>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1,1,4608,128],f32> %1455 = torch.operator "onnx.Cast"(%1450) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[1,1,4608,128],f32> %1456 = torch.operator "onnx.Cast"(%1454) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[1,1,4608,128],f32> %1457 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_10_attr__value> : tensor<5xsi64>} : () -> !torch.vtensor<[5],si64> %1458 = torch.operator "onnx.Reshape"(%1444, %1457) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[1,24,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[1,24,4608,?,2],bf16> %1459 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_11_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %1460:2 = torch.operator "onnx.Split"(%1458, %1459) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[1,24,4608,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[1,24,4608,?,1],bf16>, !torch.vtensor<[1,24,4608,?,1],bf16>) %1461 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1462 = torch.operator "onnx.Squeeze"(%1460#0, %1461) : (!torch.vtensor<[1,24,4608,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1,24,4608,?],bf16> %1463 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1464 = torch.operator "onnx.Squeeze"(%1460#1, %1463) : (!torch.vtensor<[1,24,4608,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1,24,4608,?],bf16> %1465 = torch.operator "onnx.Neg"(%1464) : (!torch.vtensor<[1,24,4608,?],bf16>) -> !torch.vtensor<[1,24,4608,?],bf16> %1466 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_14_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1467 = torch.operator "onnx.Unsqueeze"(%1465, %1466) : (!torch.vtensor<[1,24,4608,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1,24,4608,?,1],bf16> %1468 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1469 = torch.operator "onnx.Unsqueeze"(%1462, %1468) : (!torch.vtensor<[1,24,4608,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1,24,4608,?,1],bf16> %1470 = torch.operator "onnx.Concat"(%1467, %1469) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[1,24,4608,?,1],bf16>, !torch.vtensor<[1,24,4608,?,1],bf16>) -> !torch.vtensor<[1,24,4608,?,2],bf16> %1471 = torch.operator "onnx.Shape"(%1470) : (!torch.vtensor<[1,24,4608,?,2],bf16>) -> !torch.vtensor<[5],si64> %1472 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1473 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1474 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1475 = torch.operator "onnx.Slice"(%1471, %1473, %1474, %1472) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %1476 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1477 = torch.operator "onnx.Concat"(%1475, %1476) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %1478 = torch.operator "onnx.Reshape"(%1470, %1477) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[1,24,4608,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[1,24,4608,?],bf16> %1479 = torch.operator "onnx.Cast"(%1444) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1,24,?,128],bf16>) -> !torch.vtensor<[1,24,?,128],f32> %1480 = torch.operator "onnx.Mul"(%1479, %1455) : (!torch.vtensor<[1,24,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[1,24,4608,128],f32> %1481 = torch.operator "onnx.Cast"(%1478) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1,24,4608,?],bf16>) -> !torch.vtensor<[1,24,4608,?],f32> %1482 = torch.operator "onnx.Mul"(%1481, %1456) : (!torch.vtensor<[1,24,4608,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[1,24,4608,128],f32> %1483 = torch.operator "onnx.Add"(%1480, %1482) : (!torch.vtensor<[1,24,4608,128],f32>, !torch.vtensor<[1,24,4608,128],f32>) -> !torch.vtensor<[1,24,4608,128],f32> %1484 = torch.operator "onnx.Cast"(%1483) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1,24,4608,128],f32>) -> !torch.vtensor<[1,24,4608,128],bf16> %1485 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_20_attr__value> : tensor<5xsi64>} : () -> !torch.vtensor<[5],si64> %1486 = torch.operator "onnx.Reshape"(%1445, %1485) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[1,24,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[1,24,4608,?,2],bf16> %1487 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_21_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %1488:2 = torch.operator "onnx.Split"(%1486, %1487) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[1,24,4608,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[1,24,4608,?,1],bf16>, !torch.vtensor<[1,24,4608,?,1],bf16>) %1489 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1490 = torch.operator "onnx.Squeeze"(%1488#0, %1489) : (!torch.vtensor<[1,24,4608,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1,24,4608,?],bf16> %1491 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_23_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1492 = torch.operator "onnx.Squeeze"(%1488#1, %1491) : (!torch.vtensor<[1,24,4608,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1,24,4608,?],bf16> %1493 = torch.operator "onnx.Neg"(%1492) : (!torch.vtensor<[1,24,4608,?],bf16>) -> !torch.vtensor<[1,24,4608,?],bf16> %1494 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_24_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1495 = torch.operator "onnx.Unsqueeze"(%1493, %1494) : (!torch.vtensor<[1,24,4608,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1,24,4608,?,1],bf16> %1496 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_25_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1497 = torch.operator "onnx.Unsqueeze"(%1490, %1496) : (!torch.vtensor<[1,24,4608,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1,24,4608,?,1],bf16> %1498 = torch.operator "onnx.Concat"(%1495, %1497) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[1,24,4608,?,1],bf16>, !torch.vtensor<[1,24,4608,?,1],bf16>) -> !torch.vtensor<[1,24,4608,?,2],bf16> %1499 = torch.operator "onnx.Shape"(%1498) : (!torch.vtensor<[1,24,4608,?,2],bf16>) -> !torch.vtensor<[5],si64> %1500 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1501 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1502 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_28_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1503 = torch.operator "onnx.Slice"(%1499, %1501, %1502, %1500) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %1504 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1505 = torch.operator "onnx.Concat"(%1503, %1504) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %1506 = torch.operator "onnx.Reshape"(%1498, %1505) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[1,24,4608,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[1,24,4608,?],bf16> %1507 = torch.operator "onnx.Cast"(%1445) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1,24,?,128],bf16>) -> !torch.vtensor<[1,24,?,128],f32> %1508 = torch.operator "onnx.Mul"(%1507, %1455) : (!torch.vtensor<[1,24,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[1,24,4608,128],f32> %1509 = torch.operator "onnx.Cast"(%1506) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1,24,4608,?],bf16>) -> !torch.vtensor<[1,24,4608,?],f32> %1510 = torch.operator "onnx.Mul"(%1509, %1456) : (!torch.vtensor<[1,24,4608,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[1,24,4608,128],f32> %1511 = torch.operator "onnx.Add"(%1508, %1510) : (!torch.vtensor<[1,24,4608,128],f32>, !torch.vtensor<[1,24,4608,128],f32>) -> !torch.vtensor<[1,24,4608,128],f32> %1512 = torch.operator "onnx.Cast"(%1511) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1,24,4608,128],f32>) -> !torch.vtensor<[1,24,4608,128],bf16> %1513 = torch.operator "onnx.Shape"(%1484) : (!torch.vtensor<[1,24,4608,128],bf16>) -> !torch.vtensor<[4],si64> %1514 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1515 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1516 = torch.operator "onnx.Slice"(%1513, %1514, %1515) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1517 = torch.operator "onnx.Cast"(%1516) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %1518 = torch.operator "onnx.Sqrt"(%1517) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %1519 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_32_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %1520 = torch.operator "onnx.Cast"(%1518) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %1521 = torch.operator "onnx.Div"(%1519, %1520) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %1522 = torch.operator "onnx.Cast"(%1521) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %1523 = torch.operator "onnx.Transpose"(%1512) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[1,24,4608,128],bf16>) -> !torch.vtensor<[1,24,128,4608],bf16> %1524 = torch.operator "onnx.Sqrt"(%1522) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %1525 = torch.operator "onnx.Mul"(%1484, %1524) : (!torch.vtensor<[1,24,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[1,24,4608,128],bf16> %1526 = torch.operator "onnx.Sqrt"(%1522) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %1527 = torch.operator "onnx.Mul"(%1523, %1526) : (!torch.vtensor<[1,24,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[1,24,128,4608],bf16> %1528 = torch.operator "onnx.MatMul"(%1525, %1527) : (!torch.vtensor<[1,24,4608,128],bf16>, !torch.vtensor<[1,24,128,4608],bf16>) -> !torch.vtensor<[1,24,4608,4608],bf16> %1529 = torch.operator "onnx.Softmax"(%1528) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[1,24,4608,4608],bf16>) -> !torch.vtensor<[1,24,4608,4608],bf16> %1530 = torch.operator "onnx.MatMul"(%1529, %1446) : (!torch.vtensor<[1,24,4608,4608],bf16>, !torch.vtensor<[1,24,?,128],bf16>) -> !torch.vtensor<[1,24,4608,128],bf16> %1531 = torch.operator "onnx.Transpose"(%1530) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[1,24,4608,128],bf16>) -> !torch.vtensor<[1,4608,24,128],bf16> %1532 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_33_attr__value> : tensor<3xsi64>} : () -> !torch.vtensor<[3],si64> %1533 = torch.operator "onnx.Reshape"(%1531, %1532) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[1,4608,24,128],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[1,4608,3072],bf16> %1534 = torch.operator "onnx.Cast"(%1533) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1,4608,3072],bf16>) -> !torch.vtensor<[1,4608,3072],bf16> %1535 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1536 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1537 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1538 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1539 = torch.operator "onnx.Slice"(%1534, %1536, %1537, %1535, %1538) : (!torch.vtensor<[1,4608,3072],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1,512,3072],bf16> %1540 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1541 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_39_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1542 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_40_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1543 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1544 = torch.operator "onnx.Slice"(%1534, %1541, %1542, %1540, %1543) : (!torch.vtensor<[1,4608,3072],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1,4096,3072],bf16> %1545 = torch.operator "onnx.MatMul"(%1544, %747) : (!torch.vtensor<[1,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[1,4096,3072],bf16> %1546 = torch.operator "onnx.Add"(%26, %1545) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[1,4096,3072],bf16>) -> !torch.vtensor<[1,4096,3072],bf16> %1547 = torch.operator "onnx.MatMul"(%1539, %748) : (!torch.vtensor<[1,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[1,512,3072],bf16> %1548 = torch.operator "onnx.Add"(%27, %1547) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[1,512,3072],bf16>) -> !torch.vtensor<[1,512,3072],bf16> %1549 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1550 = torch.operator "onnx.Unsqueeze"(%1303, %1549) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %1551 = torch.operator "onnx.Mul"(%1550, %1546) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[1,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %1552 = torch.operator "onnx.Add"(%1161, %1551) : (!torch.vtensor<[1,4096,3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %_2Ftransformer_blocks.02Fnorm22FConstant_attr__value = util.global.load @"/transformer_blocks.0/norm2/Constant_attr__value" : tensor<3072xbf16> %1553 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.02Fnorm22FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.02Fnorm22FConstant_1_attr__value = util.global.load @"/transformer_blocks.0/norm2/Constant_1_attr__value" : tensor<3072xbf16> %1554 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.02Fnorm22FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %1555 = torch.operator "onnx.LayerNormalization"(%1552, %1553, %1554) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %1556 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1557 = torch.operator "onnx.Unsqueeze"(%1309, %1556) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %1558 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %1559 = torch.operator "onnx.Add"(%1557, %1558) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %1560 = torch.operator "onnx.Mul"(%1555, %1559) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %1561 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1562 = torch.operator "onnx.Unsqueeze"(%1306, %1561) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %1563 = torch.operator "onnx.Add"(%1560, %1562) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %1564 = torch.operator "onnx.MatMul"(%1563, %749) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %1565 = torch.operator "onnx.Add"(%30, %1564) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %1566 = torch.operator "onnx.Mul"(%1565, %1565) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %1567 = torch.operator "onnx.Mul"(%1565, %1566) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %1568 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_ff_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %1569 = torch.operator "onnx.Mul"(%1568, %1567) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %1570 = torch.operator "onnx.Add"(%1565, %1569) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %1571 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_ff_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %1572 = torch.operator "onnx.Mul"(%1571, %1570) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %1573 = torch.operator "onnx.Tanh"(%1572) : (!torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %1574 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_ff_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %1575 = torch.operator "onnx.Add"(%1574, %1573) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %1576 = torch.operator "onnx.Mul"(%1565, %1575) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %1577 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_ff_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %1578 = torch.operator "onnx.Mul"(%1577, %1576) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %1579 = torch.operator "onnx.MatMul"(%1578, %750) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %1580 = torch.operator "onnx.Add"(%31, %1579) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %1581 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1582 = torch.operator "onnx.Unsqueeze"(%1312, %1581) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %1583 = torch.operator "onnx.Mul"(%1582, %1580) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %1584 = torch.operator "onnx.Add"(%1552, %1583) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %1585 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1586 = torch.operator "onnx.Unsqueeze"(%1341, %1585) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %1587 = torch.operator "onnx.Mul"(%1586, %1548) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[1,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %1588 = torch.operator "onnx.Add"(%1225, %1587) : (!torch.vtensor<[1,512,3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %_2Ftransformer_blocks.02Fnorm2_context2FConstant_attr__value = util.global.load @"/transformer_blocks.0/norm2_context/Constant_attr__value" : tensor<3072xbf16> %1589 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.02Fnorm2_context2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.02Fnorm2_context2FConstant_1_attr__value = util.global.load @"/transformer_blocks.0/norm2_context/Constant_1_attr__value" : tensor<3072xbf16> %1590 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.02Fnorm2_context2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %1591 = torch.operator "onnx.LayerNormalization"(%1588, %1589, %1590) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %1592 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1593 = torch.operator "onnx.Unsqueeze"(%1347, %1592) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %1594 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_Constant_7_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %1595 = torch.operator "onnx.Add"(%1593, %1594) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %1596 = torch.operator "onnx.Mul"(%1591, %1595) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %1597 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1598 = torch.operator "onnx.Unsqueeze"(%1344, %1597) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %1599 = torch.operator "onnx.Add"(%1596, %1598) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %1600 = torch.operator "onnx.MatMul"(%1599, %751) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %1601 = torch.operator "onnx.Add"(%32, %1600) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %1602 = torch.operator "onnx.Mul"(%1601, %1601) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %1603 = torch.operator "onnx.Mul"(%1601, %1602) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %1604 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_ff_context_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %1605 = torch.operator "onnx.Mul"(%1604, %1603) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %1606 = torch.operator "onnx.Add"(%1601, %1605) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %1607 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_ff_context_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %1608 = torch.operator "onnx.Mul"(%1607, %1606) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %1609 = torch.operator "onnx.Tanh"(%1608) : (!torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %1610 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_ff_context_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %1611 = torch.operator "onnx.Add"(%1610, %1609) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %1612 = torch.operator "onnx.Mul"(%1601, %1611) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %1613 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_ff_context_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %1614 = torch.operator "onnx.Mul"(%1613, %1612) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %1615 = torch.operator "onnx.MatMul"(%1614, %752) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %1616 = torch.operator "onnx.Add"(%33, %1615) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %1617 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.0_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1618 = torch.operator "onnx.Unsqueeze"(%1350, %1617) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %1619 = torch.operator "onnx.Mul"(%1618, %1616) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %1620 = torch.operator "onnx.Add"(%1588, %1619) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %1621 = torch.operator "onnx.Gemm"(%1285, %34, %35) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %1622 = torch.operator "onnx.Shape"(%1621) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %1623 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_norm1_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1624 = torch.operator "onnx.Gather"(%1622, %1623) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1625 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_norm1_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1626 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_norm1_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1627 = torch.operator "onnx.Add"(%1624, %1626) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1628 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_norm1_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1629 = torch.operator "onnx.Div"(%1627, %1628) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1630 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_norm1_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1631 = torch.operator "onnx.Mul"(%1629, %1630) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1632 = torch.operator "onnx.Slice"(%1621, %1625, %1631, %1623) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %1633 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_norm1_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1634 = torch.operator "onnx.Mul"(%1629, %1633) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1635 = torch.operator "onnx.Slice"(%1621, %1631, %1634, %1623) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %1636 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_norm1_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1637 = torch.operator "onnx.Mul"(%1629, %1636) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1638 = torch.operator "onnx.Slice"(%1621, %1634, %1637, %1623) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %1639 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_norm1_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1640 = torch.operator "onnx.Mul"(%1629, %1639) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1641 = torch.operator "onnx.Slice"(%1621, %1637, %1640, %1623) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %1642 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_norm1_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1643 = torch.operator "onnx.Mul"(%1629, %1642) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1644 = torch.operator "onnx.Slice"(%1621, %1640, %1643, %1623) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %1645 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_norm1_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1646 = torch.operator "onnx.Mul"(%1629, %1645) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1647 = torch.operator "onnx.Slice"(%1621, %1643, %1646, %1623) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.12Fnorm12Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.1/norm1/norm/Constant_attr__value" : tensor<3072xbf16> %1648 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.12Fnorm12Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.12Fnorm12Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.1/norm1/norm/Constant_1_attr__value" : tensor<3072xbf16> %1649 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.12Fnorm12Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %1650 = torch.operator "onnx.LayerNormalization"(%1584, %1648, %1649) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %1651 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_norm1_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1652 = torch.operator "onnx.Unsqueeze"(%1635, %1651) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %1653 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_norm1_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %1654 = torch.operator "onnx.Add"(%1652, %1653) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %1655 = torch.operator "onnx.Mul"(%1650, %1654) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %1656 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_norm1_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1657 = torch.operator "onnx.Unsqueeze"(%1632, %1656) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %1658 = torch.operator "onnx.Add"(%1655, %1657) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %1659 = torch.operator "onnx.Gemm"(%1285, %36, %37) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %1660 = torch.operator "onnx.Shape"(%1659) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %1661 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_norm1_context_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1662 = torch.operator "onnx.Gather"(%1660, %1661) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1663 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_norm1_context_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1664 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_norm1_context_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1665 = torch.operator "onnx.Add"(%1662, %1664) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1666 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_norm1_context_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1667 = torch.operator "onnx.Div"(%1665, %1666) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1668 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_norm1_context_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1669 = torch.operator "onnx.Mul"(%1667, %1668) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1670 = torch.operator "onnx.Slice"(%1659, %1663, %1669, %1661) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %1671 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_norm1_context_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1672 = torch.operator "onnx.Mul"(%1667, %1671) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1673 = torch.operator "onnx.Slice"(%1659, %1669, %1672, %1661) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %1674 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_norm1_context_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1675 = torch.operator "onnx.Mul"(%1667, %1674) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1676 = torch.operator "onnx.Slice"(%1659, %1672, %1675, %1661) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %1677 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_norm1_context_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1678 = torch.operator "onnx.Mul"(%1667, %1677) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1679 = torch.operator "onnx.Slice"(%1659, %1675, %1678, %1661) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %1680 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_norm1_context_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1681 = torch.operator "onnx.Mul"(%1667, %1680) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1682 = torch.operator "onnx.Slice"(%1659, %1678, %1681, %1661) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %1683 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_norm1_context_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1684 = torch.operator "onnx.Mul"(%1667, %1683) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1685 = torch.operator "onnx.Slice"(%1659, %1681, %1684, %1661) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.12Fnorm1_context2Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.1/norm1_context/norm/Constant_attr__value" : tensor<3072xbf16> %1686 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.12Fnorm1_context2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.12Fnorm1_context2Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.1/norm1_context/norm/Constant_1_attr__value" : tensor<3072xbf16> %1687 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.12Fnorm1_context2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %1688 = torch.operator "onnx.LayerNormalization"(%1620, %1686, %1687) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %1689 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_norm1_context_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1690 = torch.operator "onnx.Unsqueeze"(%1673, %1689) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %1691 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_norm1_context_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %1692 = torch.operator "onnx.Add"(%1690, %1691) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %1693 = torch.operator "onnx.Mul"(%1688, %1692) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %1694 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_norm1_context_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1695 = torch.operator "onnx.Unsqueeze"(%1670, %1694) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %1696 = torch.operator "onnx.Add"(%1693, %1695) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %1697 = torch.operator "onnx.Shape"(%1696) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %1698 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %1699 = torch.operator "onnx.Gather"(%1697, %1698) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %1700 = torch.operator "onnx.MatMul"(%1658, %753) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %1701 = torch.operator "onnx.Add"(%40, %1700) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %1702 = torch.operator "onnx.MatMul"(%1658, %754) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %1703 = torch.operator "onnx.Add"(%41, %1702) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %1704 = torch.operator "onnx.MatMul"(%1658, %755) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %1705 = torch.operator "onnx.Add"(%42, %1704) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %1706 = torch.operator "onnx.Shape"(%1703) : (!torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[3],si64> %1707 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %1708 = torch.operator "onnx.Gather"(%1706, %1707) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %1709 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %1710 = torch.operator "onnx.Div"(%1708, %1709) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %1711 = torch.operator "onnx.Cast"(%1710) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %1712 = torch.operator "onnx.Cast"(%1711) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %1713 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6255_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1714 = torch.operator "onnx.Unsqueeze"(%1699, %1713) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1715 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1716 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1717 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6259_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1718 = torch.operator "onnx.Unsqueeze"(%1712, %1717) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1719 = torch.operator "onnx.Concat"(%1714, %1715, %1716, %1718) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %1720 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6262_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1721 = torch.operator "onnx.Unsqueeze"(%1699, %1720) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1722 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1723 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1724 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6266_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1725 = torch.operator "onnx.Unsqueeze"(%1712, %1724) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1726 = torch.operator "onnx.Concat"(%1721, %1722, %1723, %1725) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %1727 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6269_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1728 = torch.operator "onnx.Unsqueeze"(%1699, %1727) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1729 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1730 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1731 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6273_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1732 = torch.operator "onnx.Unsqueeze"(%1712, %1731) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1733 = torch.operator "onnx.Concat"(%1728, %1729, %1730, %1732) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %1734 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6276_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1735 = torch.operator "onnx.Unsqueeze"(%1699, %1734) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1736 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1737 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1738 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6280_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1739 = torch.operator "onnx.Unsqueeze"(%1712, %1738) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1740 = torch.operator "onnx.Concat"(%1735, %1736, %1737, %1739) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %1741 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6283_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1742 = torch.operator "onnx.Unsqueeze"(%1699, %1741) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1743 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_11_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1744 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1745 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6287_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1746 = torch.operator "onnx.Unsqueeze"(%1712, %1745) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1747 = torch.operator "onnx.Concat"(%1742, %1743, %1744, %1746) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %1748 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6290_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1749 = torch.operator "onnx.Unsqueeze"(%1699, %1748) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1750 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1751 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_14_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1752 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6294_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1753 = torch.operator "onnx.Unsqueeze"(%1712, %1752) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1754 = torch.operator "onnx.Concat"(%1749, %1750, %1751, %1753) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %1755 = torch.operator "onnx.Reshape"(%1701, %1719) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %1756 = torch.operator "onnx.Transpose"(%1755) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %1757 = torch.operator "onnx.Reshape"(%1703, %1726) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %1758 = torch.operator "onnx.Transpose"(%1757) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %1759 = torch.operator "onnx.Reshape"(%1705, %1733) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %1760 = torch.operator "onnx.Transpose"(%1759) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %1761 = torch.operator "onnx.Cast"(%1756) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %1762 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %1763 = torch.operator "onnx.Pow"(%1761, %1762) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %1764 = torch.operator "onnx.ReduceMean"(%1763) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %1765 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %1766 = torch.operator "onnx.Add"(%1764, %1765) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %1767 = torch.operator "onnx.Sqrt"(%1766) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %1768 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %1769 = torch.operator "onnx.Div"(%1768, %1767) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %1770 = torch.operator "onnx.Cast"(%1756) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %1771 = torch.operator "onnx.Mul"(%1770, %1769) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %1772 = torch.operator "onnx.Cast"(%1771) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %1773 = torch.operator "onnx.Mul"(%1772, %38) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %1774 = torch.operator "onnx.Cast"(%1758) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %1775 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %1776 = torch.operator "onnx.Pow"(%1774, %1775) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %1777 = torch.operator "onnx.ReduceMean"(%1776) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %1778 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %1779 = torch.operator "onnx.Add"(%1777, %1778) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %1780 = torch.operator "onnx.Sqrt"(%1779) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %1781 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %1782 = torch.operator "onnx.Div"(%1781, %1780) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %1783 = torch.operator "onnx.Cast"(%1758) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %1784 = torch.operator "onnx.Mul"(%1783, %1782) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %1785 = torch.operator "onnx.Cast"(%1784) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %1786 = torch.operator "onnx.Mul"(%1785, %39) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %1787 = torch.operator "onnx.MatMul"(%1696, %756) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %1788 = torch.operator "onnx.Add"(%45, %1787) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %1789 = torch.operator "onnx.MatMul"(%1696, %757) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %1790 = torch.operator "onnx.Add"(%43, %1789) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %1791 = torch.operator "onnx.MatMul"(%1696, %758) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %1792 = torch.operator "onnx.Add"(%44, %1791) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %1793 = torch.operator "onnx.Reshape"(%1788, %1740) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %1794 = torch.operator "onnx.Transpose"(%1793) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %1795 = torch.operator "onnx.Reshape"(%1790, %1747) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %1796 = torch.operator "onnx.Transpose"(%1795) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %1797 = torch.operator "onnx.Reshape"(%1792, %1754) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %1798 = torch.operator "onnx.Transpose"(%1797) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %1799 = torch.operator "onnx.Cast"(%1794) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %1800 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_norm_added_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %1801 = torch.operator "onnx.Pow"(%1799, %1800) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %1802 = torch.operator "onnx.ReduceMean"(%1801) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %1803 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_norm_added_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %1804 = torch.operator "onnx.Add"(%1802, %1803) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %1805 = torch.operator "onnx.Sqrt"(%1804) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %1806 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_norm_added_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %1807 = torch.operator "onnx.Div"(%1806, %1805) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %1808 = torch.operator "onnx.Cast"(%1794) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %1809 = torch.operator "onnx.Mul"(%1808, %1807) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %1810 = torch.operator "onnx.Cast"(%1809) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %1811 = torch.operator "onnx.Mul"(%1810, %48) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %1812 = torch.operator "onnx.Cast"(%1796) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %1813 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_norm_added_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %1814 = torch.operator "onnx.Pow"(%1812, %1813) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %1815 = torch.operator "onnx.ReduceMean"(%1814) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %1816 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_norm_added_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %1817 = torch.operator "onnx.Add"(%1815, %1816) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %1818 = torch.operator "onnx.Sqrt"(%1817) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %1819 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_norm_added_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %1820 = torch.operator "onnx.Div"(%1819, %1818) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %1821 = torch.operator "onnx.Cast"(%1796) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %1822 = torch.operator "onnx.Mul"(%1821, %1820) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %1823 = torch.operator "onnx.Cast"(%1822) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %1824 = torch.operator "onnx.Mul"(%1823, %49) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %1825 = torch.operator "onnx.Concat"(%1811, %1773) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %1826 = torch.operator "onnx.Concat"(%1824, %1786) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %1827 = torch.operator "onnx.Concat"(%1798, %1760) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %1828 = torch.operator "onnx.Shape"(%1825) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %1829 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_15_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %1830 = torch.operator "onnx.Gather"(%1828, %1829) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %1831 = torch.operator "onnx.Shape"(%1825) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %1832 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_16_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %1833 = torch.operator "onnx.Gather"(%1831, %1832) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %1834 = torch.operator "onnx.Shape"(%1825) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %1835 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_17_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %1836 = torch.operator "onnx.Gather"(%1834, %1835) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %1837 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6379_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1838 = torch.operator "onnx.Unsqueeze"(%1830, %1837) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1839 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6381_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1840 = torch.operator "onnx.Unsqueeze"(%1833, %1839) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1841 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6383_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1842 = torch.operator "onnx.Unsqueeze"(%1836, %1841) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1843 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1844 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1845 = torch.operator "onnx.Concat"(%1838, %1840, %1842, %1843, %1844) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %1846 = torch.operator "onnx.Reshape"(%1825, %1845) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %1847 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_20_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %1848:2 = torch.operator "onnx.Split"(%1846, %1847) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %1849 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1850 = torch.operator "onnx.Squeeze"(%1848#0, %1849) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %1851 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1852 = torch.operator "onnx.Squeeze"(%1848#1, %1851) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %1853 = torch.operator "onnx.Neg"(%1852) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %1854 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_23_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1855 = torch.operator "onnx.Unsqueeze"(%1853, %1854) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %1856 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_24_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1857 = torch.operator "onnx.Unsqueeze"(%1850, %1856) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %1858 = torch.operator "onnx.Concat"(%1855, %1857) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %1859 = torch.operator "onnx.Shape"(%1858) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %1860 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_25_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1861 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1862 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1863 = torch.operator "onnx.Slice"(%1859, %1861, %1862, %1860) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %1864 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_28_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1865 = torch.operator "onnx.Concat"(%1863, %1864) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %1866 = torch.operator "onnx.Reshape"(%1858, %1865) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %1867 = torch.operator "onnx.Cast"(%1825) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %1868 = torch.operator "onnx.Mul"(%1867, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %1869 = torch.operator "onnx.Cast"(%1866) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %1870 = torch.operator "onnx.Mul"(%1869, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %1871 = torch.operator "onnx.Add"(%1868, %1870) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %1872 = torch.operator "onnx.Cast"(%1871) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %1873 = torch.operator "onnx.Shape"(%1826) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %1874 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_29_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %1875 = torch.operator "onnx.Gather"(%1873, %1874) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %1876 = torch.operator "onnx.Shape"(%1826) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %1877 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_30_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %1878 = torch.operator "onnx.Gather"(%1876, %1877) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %1879 = torch.operator "onnx.Shape"(%1826) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %1880 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_31_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %1881 = torch.operator "onnx.Gather"(%1879, %1880) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %1882 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6424_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1883 = torch.operator "onnx.Unsqueeze"(%1875, %1882) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1884 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6426_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1885 = torch.operator "onnx.Unsqueeze"(%1878, %1884) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1886 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6428_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1887 = torch.operator "onnx.Unsqueeze"(%1881, %1886) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1888 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1889 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1890 = torch.operator "onnx.Concat"(%1883, %1885, %1887, %1888, %1889) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %1891 = torch.operator "onnx.Reshape"(%1826, %1890) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %1892 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_34_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %1893:2 = torch.operator "onnx.Split"(%1891, %1892) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %1894 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1895 = torch.operator "onnx.Squeeze"(%1893#0, %1894) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %1896 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1897 = torch.operator "onnx.Squeeze"(%1893#1, %1896) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %1898 = torch.operator "onnx.Neg"(%1897) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %1899 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1900 = torch.operator "onnx.Unsqueeze"(%1898, %1899) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %1901 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1902 = torch.operator "onnx.Unsqueeze"(%1895, %1901) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %1903 = torch.operator "onnx.Concat"(%1900, %1902) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %1904 = torch.operator "onnx.Shape"(%1903) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %1905 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_39_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1906 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_40_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1907 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1908 = torch.operator "onnx.Slice"(%1904, %1906, %1907, %1905) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %1909 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_42_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1910 = torch.operator "onnx.Concat"(%1908, %1909) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %1911 = torch.operator "onnx.Reshape"(%1903, %1910) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %1912 = torch.operator "onnx.Cast"(%1826) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %1913 = torch.operator "onnx.Mul"(%1912, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %1914 = torch.operator "onnx.Cast"(%1911) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %1915 = torch.operator "onnx.Mul"(%1914, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %1916 = torch.operator "onnx.Add"(%1913, %1915) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %1917 = torch.operator "onnx.Cast"(%1916) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %1918 = torch.operator "onnx.Shape"(%1872) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %1919 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_43_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1920 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_44_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1921 = torch.operator "onnx.Slice"(%1918, %1919, %1920) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1922 = torch.operator "onnx.Cast"(%1921) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %1923 = torch.operator "onnx.Sqrt"(%1922) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %1924 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_45_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %1925 = torch.operator "onnx.Cast"(%1923) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %1926 = torch.operator "onnx.Div"(%1924, %1925) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %1927 = torch.operator "onnx.Cast"(%1926) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %1928 = torch.operator "onnx.Transpose"(%1917) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %1929 = torch.operator "onnx.Sqrt"(%1927) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %1930 = torch.operator "onnx.Mul"(%1872, %1929) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %1931 = torch.operator "onnx.Sqrt"(%1927) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %1932 = torch.operator "onnx.Mul"(%1928, %1931) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %1933 = torch.operator "onnx.MatMul"(%1930, %1932) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %1934 = torch.operator "onnx.Softmax"(%1933) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %1935 = torch.operator "onnx.MatMul"(%1934, %1827) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %1936 = torch.operator "onnx.Transpose"(%1935) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %1937 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_46_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %1938 = torch.operator "onnx.Mul"(%1712, %1937) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %1939 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6481_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1940 = torch.operator "onnx.Unsqueeze"(%1699, %1939) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1941 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_47_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1942 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6484_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1943 = torch.operator "onnx.Unsqueeze"(%1938, %1942) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1944 = torch.operator "onnx.Concat"(%1940, %1941, %1943) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %1945 = torch.operator "onnx.Reshape"(%1936, %1944) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %1946 = torch.operator "onnx.Cast"(%1945) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %1947 = torch.operator "onnx.Shape"(%1696) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %1948 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_48_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %1949 = torch.operator "onnx.Gather"(%1947, %1948) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %1950 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_49_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1951 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_50_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1952 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_51_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1953 = torch.operator "onnx.Unsqueeze"(%1949, %1952) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1954 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_52_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1955 = torch.operator "onnx.Slice"(%1946, %1951, %1953, %1950, %1954) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %1956 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_53_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1957 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_54_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1958 = torch.operator "onnx.Unsqueeze"(%1949, %1957) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %1959 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_55_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1960 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_attn_Constant_56_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1961 = torch.operator "onnx.Slice"(%1946, %1958, %1959, %1956, %1960) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %1962 = torch.operator "onnx.MatMul"(%1961, %759) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %1963 = torch.operator "onnx.Add"(%46, %1962) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %1964 = torch.operator "onnx.MatMul"(%1955, %760) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %1965 = torch.operator "onnx.Add"(%47, %1964) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %1966 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1967 = torch.operator "onnx.Unsqueeze"(%1638, %1966) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %1968 = torch.operator "onnx.Mul"(%1967, %1963) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %1969 = torch.operator "onnx.Add"(%1584, %1968) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %_2Ftransformer_blocks.12Fnorm22FConstant_attr__value = util.global.load @"/transformer_blocks.1/norm2/Constant_attr__value" : tensor<3072xbf16> %1970 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.12Fnorm22FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.12Fnorm22FConstant_1_attr__value = util.global.load @"/transformer_blocks.1/norm2/Constant_1_attr__value" : tensor<3072xbf16> %1971 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.12Fnorm22FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %1972 = torch.operator "onnx.LayerNormalization"(%1969, %1970, %1971) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %1973 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1974 = torch.operator "onnx.Unsqueeze"(%1644, %1973) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %1975 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %1976 = torch.operator "onnx.Add"(%1974, %1975) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %1977 = torch.operator "onnx.Mul"(%1972, %1976) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %1978 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1979 = torch.operator "onnx.Unsqueeze"(%1641, %1978) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %1980 = torch.operator "onnx.Add"(%1977, %1979) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %1981 = torch.operator "onnx.MatMul"(%1980, %761) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %1982 = torch.operator "onnx.Add"(%50, %1981) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %1983 = torch.operator "onnx.Mul"(%1982, %1982) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %1984 = torch.operator "onnx.Mul"(%1982, %1983) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %1985 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_ff_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %1986 = torch.operator "onnx.Mul"(%1985, %1984) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %1987 = torch.operator "onnx.Add"(%1982, %1986) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %1988 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_ff_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %1989 = torch.operator "onnx.Mul"(%1988, %1987) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %1990 = torch.operator "onnx.Tanh"(%1989) : (!torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %1991 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_ff_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %1992 = torch.operator "onnx.Add"(%1991, %1990) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %1993 = torch.operator "onnx.Mul"(%1982, %1992) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %1994 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_ff_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %1995 = torch.operator "onnx.Mul"(%1994, %1993) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %1996 = torch.operator "onnx.MatMul"(%1995, %762) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %1997 = torch.operator "onnx.Add"(%51, %1996) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %1998 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %1999 = torch.operator "onnx.Unsqueeze"(%1647, %1998) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %2000 = torch.operator "onnx.Mul"(%1999, %1997) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2001 = torch.operator "onnx.Add"(%1969, %2000) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2002 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2003 = torch.operator "onnx.Unsqueeze"(%1676, %2002) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %2004 = torch.operator "onnx.Mul"(%2003, %1965) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %2005 = torch.operator "onnx.Add"(%1620, %2004) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %_2Ftransformer_blocks.12Fnorm2_context2FConstant_attr__value = util.global.load @"/transformer_blocks.1/norm2_context/Constant_attr__value" : tensor<3072xbf16> %2006 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.12Fnorm2_context2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.12Fnorm2_context2FConstant_1_attr__value = util.global.load @"/transformer_blocks.1/norm2_context/Constant_1_attr__value" : tensor<3072xbf16> %2007 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.12Fnorm2_context2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %2008 = torch.operator "onnx.LayerNormalization"(%2005, %2006, %2007) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2009 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2010 = torch.operator "onnx.Unsqueeze"(%1682, %2009) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %2011 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_Constant_7_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %2012 = torch.operator "onnx.Add"(%2010, %2011) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %2013 = torch.operator "onnx.Mul"(%2008, %2012) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2014 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2015 = torch.operator "onnx.Unsqueeze"(%1679, %2014) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %2016 = torch.operator "onnx.Add"(%2013, %2015) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2017 = torch.operator "onnx.MatMul"(%2016, %763) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %2018 = torch.operator "onnx.Add"(%52, %2017) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %2019 = torch.operator "onnx.Mul"(%2018, %2018) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %2020 = torch.operator "onnx.Mul"(%2018, %2019) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %2021 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_ff_context_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %2022 = torch.operator "onnx.Mul"(%2021, %2020) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %2023 = torch.operator "onnx.Add"(%2018, %2022) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %2024 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_ff_context_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %2025 = torch.operator "onnx.Mul"(%2024, %2023) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %2026 = torch.operator "onnx.Tanh"(%2025) : (!torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %2027 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_ff_context_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %2028 = torch.operator "onnx.Add"(%2027, %2026) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %2029 = torch.operator "onnx.Mul"(%2018, %2028) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %2030 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_ff_context_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %2031 = torch.operator "onnx.Mul"(%2030, %2029) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %2032 = torch.operator "onnx.MatMul"(%2031, %764) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2033 = torch.operator "onnx.Add"(%53, %2032) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2034 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.1_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2035 = torch.operator "onnx.Unsqueeze"(%1685, %2034) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %2036 = torch.operator "onnx.Mul"(%2035, %2033) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2037 = torch.operator "onnx.Add"(%2005, %2036) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2038 = torch.operator "onnx.Gemm"(%1285, %54, %55) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %2039 = torch.operator "onnx.Shape"(%2038) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %2040 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_norm1_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2041 = torch.operator "onnx.Gather"(%2039, %2040) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2042 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_norm1_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2043 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_norm1_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2044 = torch.operator "onnx.Add"(%2041, %2043) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2045 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_norm1_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2046 = torch.operator "onnx.Div"(%2044, %2045) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2047 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_norm1_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2048 = torch.operator "onnx.Mul"(%2046, %2047) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2049 = torch.operator "onnx.Slice"(%2038, %2042, %2048, %2040) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %2050 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_norm1_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2051 = torch.operator "onnx.Mul"(%2046, %2050) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2052 = torch.operator "onnx.Slice"(%2038, %2048, %2051, %2040) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %2053 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_norm1_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2054 = torch.operator "onnx.Mul"(%2046, %2053) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2055 = torch.operator "onnx.Slice"(%2038, %2051, %2054, %2040) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %2056 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_norm1_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2057 = torch.operator "onnx.Mul"(%2046, %2056) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2058 = torch.operator "onnx.Slice"(%2038, %2054, %2057, %2040) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %2059 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_norm1_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2060 = torch.operator "onnx.Mul"(%2046, %2059) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2061 = torch.operator "onnx.Slice"(%2038, %2057, %2060, %2040) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %2062 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_norm1_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2063 = torch.operator "onnx.Mul"(%2046, %2062) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2064 = torch.operator "onnx.Slice"(%2038, %2060, %2063, %2040) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.22Fnorm12Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.2/norm1/norm/Constant_attr__value" : tensor<3072xbf16> %2065 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.22Fnorm12Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.22Fnorm12Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.2/norm1/norm/Constant_1_attr__value" : tensor<3072xbf16> %2066 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.22Fnorm12Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %2067 = torch.operator "onnx.LayerNormalization"(%2001, %2065, %2066) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2068 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_norm1_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2069 = torch.operator "onnx.Unsqueeze"(%2052, %2068) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %2070 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_norm1_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %2071 = torch.operator "onnx.Add"(%2069, %2070) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %2072 = torch.operator "onnx.Mul"(%2067, %2071) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2073 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_norm1_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2074 = torch.operator "onnx.Unsqueeze"(%2049, %2073) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %2075 = torch.operator "onnx.Add"(%2072, %2074) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2076 = torch.operator "onnx.Gemm"(%1285, %56, %57) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %2077 = torch.operator "onnx.Shape"(%2076) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %2078 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_norm1_context_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2079 = torch.operator "onnx.Gather"(%2077, %2078) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2080 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_norm1_context_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2081 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_norm1_context_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2082 = torch.operator "onnx.Add"(%2079, %2081) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2083 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_norm1_context_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2084 = torch.operator "onnx.Div"(%2082, %2083) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2085 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_norm1_context_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2086 = torch.operator "onnx.Mul"(%2084, %2085) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2087 = torch.operator "onnx.Slice"(%2076, %2080, %2086, %2078) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %2088 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_norm1_context_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2089 = torch.operator "onnx.Mul"(%2084, %2088) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2090 = torch.operator "onnx.Slice"(%2076, %2086, %2089, %2078) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %2091 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_norm1_context_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2092 = torch.operator "onnx.Mul"(%2084, %2091) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2093 = torch.operator "onnx.Slice"(%2076, %2089, %2092, %2078) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %2094 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_norm1_context_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2095 = torch.operator "onnx.Mul"(%2084, %2094) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2096 = torch.operator "onnx.Slice"(%2076, %2092, %2095, %2078) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %2097 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_norm1_context_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2098 = torch.operator "onnx.Mul"(%2084, %2097) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2099 = torch.operator "onnx.Slice"(%2076, %2095, %2098, %2078) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %2100 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_norm1_context_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2101 = torch.operator "onnx.Mul"(%2084, %2100) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2102 = torch.operator "onnx.Slice"(%2076, %2098, %2101, %2078) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.22Fnorm1_context2Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.2/norm1_context/norm/Constant_attr__value" : tensor<3072xbf16> %2103 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.22Fnorm1_context2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.22Fnorm1_context2Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.2/norm1_context/norm/Constant_1_attr__value" : tensor<3072xbf16> %2104 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.22Fnorm1_context2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %2105 = torch.operator "onnx.LayerNormalization"(%2037, %2103, %2104) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2106 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_norm1_context_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2107 = torch.operator "onnx.Unsqueeze"(%2090, %2106) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %2108 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_norm1_context_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %2109 = torch.operator "onnx.Add"(%2107, %2108) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %2110 = torch.operator "onnx.Mul"(%2105, %2109) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2111 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_norm1_context_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2112 = torch.operator "onnx.Unsqueeze"(%2087, %2111) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %2113 = torch.operator "onnx.Add"(%2110, %2112) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2114 = torch.operator "onnx.Shape"(%2113) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %2115 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %2116 = torch.operator "onnx.Gather"(%2114, %2115) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %2117 = torch.operator "onnx.MatMul"(%2075, %765) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2118 = torch.operator "onnx.Add"(%60, %2117) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2119 = torch.operator "onnx.MatMul"(%2075, %766) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2120 = torch.operator "onnx.Add"(%61, %2119) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2121 = torch.operator "onnx.MatMul"(%2075, %767) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2122 = torch.operator "onnx.Add"(%62, %2121) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2123 = torch.operator "onnx.Shape"(%2120) : (!torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[3],si64> %2124 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %2125 = torch.operator "onnx.Gather"(%2123, %2124) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %2126 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %2127 = torch.operator "onnx.Div"(%2125, %2126) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %2128 = torch.operator "onnx.Cast"(%2127) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %2129 = torch.operator "onnx.Cast"(%2128) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %2130 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6672_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2131 = torch.operator "onnx.Unsqueeze"(%2116, %2130) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2132 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2133 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2134 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6676_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2135 = torch.operator "onnx.Unsqueeze"(%2129, %2134) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2136 = torch.operator "onnx.Concat"(%2131, %2132, %2133, %2135) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %2137 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6679_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2138 = torch.operator "onnx.Unsqueeze"(%2116, %2137) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2139 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2140 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2141 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6683_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2142 = torch.operator "onnx.Unsqueeze"(%2129, %2141) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2143 = torch.operator "onnx.Concat"(%2138, %2139, %2140, %2142) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %2144 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6686_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2145 = torch.operator "onnx.Unsqueeze"(%2116, %2144) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2146 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2147 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2148 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6690_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2149 = torch.operator "onnx.Unsqueeze"(%2129, %2148) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2150 = torch.operator "onnx.Concat"(%2145, %2146, %2147, %2149) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %2151 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6693_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2152 = torch.operator "onnx.Unsqueeze"(%2116, %2151) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2153 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2154 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2155 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6697_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2156 = torch.operator "onnx.Unsqueeze"(%2129, %2155) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2157 = torch.operator "onnx.Concat"(%2152, %2153, %2154, %2156) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %2158 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6700_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2159 = torch.operator "onnx.Unsqueeze"(%2116, %2158) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2160 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_11_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2161 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2162 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6704_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2163 = torch.operator "onnx.Unsqueeze"(%2129, %2162) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2164 = torch.operator "onnx.Concat"(%2159, %2160, %2161, %2163) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %2165 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6707_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2166 = torch.operator "onnx.Unsqueeze"(%2116, %2165) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2167 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2168 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_14_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2169 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6711_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2170 = torch.operator "onnx.Unsqueeze"(%2129, %2169) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2171 = torch.operator "onnx.Concat"(%2166, %2167, %2168, %2170) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %2172 = torch.operator "onnx.Reshape"(%2118, %2136) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %2173 = torch.operator "onnx.Transpose"(%2172) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %2174 = torch.operator "onnx.Reshape"(%2120, %2143) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %2175 = torch.operator "onnx.Transpose"(%2174) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %2176 = torch.operator "onnx.Reshape"(%2122, %2150) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %2177 = torch.operator "onnx.Transpose"(%2176) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %2178 = torch.operator "onnx.Cast"(%2173) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %2179 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %2180 = torch.operator "onnx.Pow"(%2178, %2179) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %2181 = torch.operator "onnx.ReduceMean"(%2180) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %2182 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %2183 = torch.operator "onnx.Add"(%2181, %2182) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %2184 = torch.operator "onnx.Sqrt"(%2183) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %2185 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %2186 = torch.operator "onnx.Div"(%2185, %2184) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %2187 = torch.operator "onnx.Cast"(%2173) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %2188 = torch.operator "onnx.Mul"(%2187, %2186) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %2189 = torch.operator "onnx.Cast"(%2188) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %2190 = torch.operator "onnx.Mul"(%2189, %58) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %2191 = torch.operator "onnx.Cast"(%2175) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %2192 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %2193 = torch.operator "onnx.Pow"(%2191, %2192) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %2194 = torch.operator "onnx.ReduceMean"(%2193) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %2195 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %2196 = torch.operator "onnx.Add"(%2194, %2195) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %2197 = torch.operator "onnx.Sqrt"(%2196) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %2198 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %2199 = torch.operator "onnx.Div"(%2198, %2197) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %2200 = torch.operator "onnx.Cast"(%2175) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %2201 = torch.operator "onnx.Mul"(%2200, %2199) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %2202 = torch.operator "onnx.Cast"(%2201) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %2203 = torch.operator "onnx.Mul"(%2202, %59) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %2204 = torch.operator "onnx.MatMul"(%2113, %768) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2205 = torch.operator "onnx.Add"(%65, %2204) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2206 = torch.operator "onnx.MatMul"(%2113, %769) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2207 = torch.operator "onnx.Add"(%63, %2206) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2208 = torch.operator "onnx.MatMul"(%2113, %770) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2209 = torch.operator "onnx.Add"(%64, %2208) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2210 = torch.operator "onnx.Reshape"(%2205, %2157) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %2211 = torch.operator "onnx.Transpose"(%2210) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %2212 = torch.operator "onnx.Reshape"(%2207, %2164) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %2213 = torch.operator "onnx.Transpose"(%2212) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %2214 = torch.operator "onnx.Reshape"(%2209, %2171) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %2215 = torch.operator "onnx.Transpose"(%2214) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %2216 = torch.operator "onnx.Cast"(%2211) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %2217 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_norm_added_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %2218 = torch.operator "onnx.Pow"(%2216, %2217) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %2219 = torch.operator "onnx.ReduceMean"(%2218) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %2220 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_norm_added_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %2221 = torch.operator "onnx.Add"(%2219, %2220) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %2222 = torch.operator "onnx.Sqrt"(%2221) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %2223 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_norm_added_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %2224 = torch.operator "onnx.Div"(%2223, %2222) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %2225 = torch.operator "onnx.Cast"(%2211) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %2226 = torch.operator "onnx.Mul"(%2225, %2224) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %2227 = torch.operator "onnx.Cast"(%2226) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %2228 = torch.operator "onnx.Mul"(%2227, %68) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %2229 = torch.operator "onnx.Cast"(%2213) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %2230 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_norm_added_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %2231 = torch.operator "onnx.Pow"(%2229, %2230) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %2232 = torch.operator "onnx.ReduceMean"(%2231) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %2233 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_norm_added_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %2234 = torch.operator "onnx.Add"(%2232, %2233) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %2235 = torch.operator "onnx.Sqrt"(%2234) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %2236 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_norm_added_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %2237 = torch.operator "onnx.Div"(%2236, %2235) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %2238 = torch.operator "onnx.Cast"(%2213) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %2239 = torch.operator "onnx.Mul"(%2238, %2237) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %2240 = torch.operator "onnx.Cast"(%2239) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %2241 = torch.operator "onnx.Mul"(%2240, %69) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %2242 = torch.operator "onnx.Concat"(%2228, %2190) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %2243 = torch.operator "onnx.Concat"(%2241, %2203) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %2244 = torch.operator "onnx.Concat"(%2215, %2177) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %2245 = torch.operator "onnx.Shape"(%2242) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %2246 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_15_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %2247 = torch.operator "onnx.Gather"(%2245, %2246) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %2248 = torch.operator "onnx.Shape"(%2242) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %2249 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_16_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %2250 = torch.operator "onnx.Gather"(%2248, %2249) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %2251 = torch.operator "onnx.Shape"(%2242) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %2252 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_17_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %2253 = torch.operator "onnx.Gather"(%2251, %2252) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %2254 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6796_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2255 = torch.operator "onnx.Unsqueeze"(%2247, %2254) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2256 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6798_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2257 = torch.operator "onnx.Unsqueeze"(%2250, %2256) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2258 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6800_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2259 = torch.operator "onnx.Unsqueeze"(%2253, %2258) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2260 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2261 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2262 = torch.operator "onnx.Concat"(%2255, %2257, %2259, %2260, %2261) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %2263 = torch.operator "onnx.Reshape"(%2242, %2262) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %2264 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_20_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %2265:2 = torch.operator "onnx.Split"(%2263, %2264) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %2266 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2267 = torch.operator "onnx.Squeeze"(%2265#0, %2266) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %2268 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2269 = torch.operator "onnx.Squeeze"(%2265#1, %2268) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %2270 = torch.operator "onnx.Neg"(%2269) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %2271 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_23_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2272 = torch.operator "onnx.Unsqueeze"(%2270, %2271) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %2273 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_24_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2274 = torch.operator "onnx.Unsqueeze"(%2267, %2273) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %2275 = torch.operator "onnx.Concat"(%2272, %2274) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %2276 = torch.operator "onnx.Shape"(%2275) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %2277 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_25_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2278 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2279 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2280 = torch.operator "onnx.Slice"(%2276, %2278, %2279, %2277) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %2281 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_28_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2282 = torch.operator "onnx.Concat"(%2280, %2281) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %2283 = torch.operator "onnx.Reshape"(%2275, %2282) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %2284 = torch.operator "onnx.Cast"(%2242) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %2285 = torch.operator "onnx.Mul"(%2284, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %2286 = torch.operator "onnx.Cast"(%2283) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %2287 = torch.operator "onnx.Mul"(%2286, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %2288 = torch.operator "onnx.Add"(%2285, %2287) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %2289 = torch.operator "onnx.Cast"(%2288) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %2290 = torch.operator "onnx.Shape"(%2243) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %2291 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_29_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %2292 = torch.operator "onnx.Gather"(%2290, %2291) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %2293 = torch.operator "onnx.Shape"(%2243) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %2294 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_30_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %2295 = torch.operator "onnx.Gather"(%2293, %2294) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %2296 = torch.operator "onnx.Shape"(%2243) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %2297 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_31_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %2298 = torch.operator "onnx.Gather"(%2296, %2297) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %2299 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6841_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2300 = torch.operator "onnx.Unsqueeze"(%2292, %2299) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2301 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6843_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2302 = torch.operator "onnx.Unsqueeze"(%2295, %2301) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2303 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6845_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2304 = torch.operator "onnx.Unsqueeze"(%2298, %2303) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2305 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2306 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2307 = torch.operator "onnx.Concat"(%2300, %2302, %2304, %2305, %2306) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %2308 = torch.operator "onnx.Reshape"(%2243, %2307) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %2309 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_34_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %2310:2 = torch.operator "onnx.Split"(%2308, %2309) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %2311 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2312 = torch.operator "onnx.Squeeze"(%2310#0, %2311) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %2313 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2314 = torch.operator "onnx.Squeeze"(%2310#1, %2313) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %2315 = torch.operator "onnx.Neg"(%2314) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %2316 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2317 = torch.operator "onnx.Unsqueeze"(%2315, %2316) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %2318 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2319 = torch.operator "onnx.Unsqueeze"(%2312, %2318) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %2320 = torch.operator "onnx.Concat"(%2317, %2319) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %2321 = torch.operator "onnx.Shape"(%2320) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %2322 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_39_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2323 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_40_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2324 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2325 = torch.operator "onnx.Slice"(%2321, %2323, %2324, %2322) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %2326 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_42_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2327 = torch.operator "onnx.Concat"(%2325, %2326) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %2328 = torch.operator "onnx.Reshape"(%2320, %2327) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %2329 = torch.operator "onnx.Cast"(%2243) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %2330 = torch.operator "onnx.Mul"(%2329, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %2331 = torch.operator "onnx.Cast"(%2328) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %2332 = torch.operator "onnx.Mul"(%2331, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %2333 = torch.operator "onnx.Add"(%2330, %2332) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %2334 = torch.operator "onnx.Cast"(%2333) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %2335 = torch.operator "onnx.Shape"(%2289) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %2336 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_43_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2337 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_44_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2338 = torch.operator "onnx.Slice"(%2335, %2336, %2337) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2339 = torch.operator "onnx.Cast"(%2338) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %2340 = torch.operator "onnx.Sqrt"(%2339) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %2341 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_45_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %2342 = torch.operator "onnx.Cast"(%2340) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %2343 = torch.operator "onnx.Div"(%2341, %2342) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %2344 = torch.operator "onnx.Cast"(%2343) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %2345 = torch.operator "onnx.Transpose"(%2334) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %2346 = torch.operator "onnx.Sqrt"(%2344) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %2347 = torch.operator "onnx.Mul"(%2289, %2346) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %2348 = torch.operator "onnx.Sqrt"(%2344) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %2349 = torch.operator "onnx.Mul"(%2345, %2348) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %2350 = torch.operator "onnx.MatMul"(%2347, %2349) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %2351 = torch.operator "onnx.Softmax"(%2350) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %2352 = torch.operator "onnx.MatMul"(%2351, %2244) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %2353 = torch.operator "onnx.Transpose"(%2352) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %2354 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_46_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %2355 = torch.operator "onnx.Mul"(%2129, %2354) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %2356 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6898_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2357 = torch.operator "onnx.Unsqueeze"(%2116, %2356) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2358 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_47_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2359 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_6901_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2360 = torch.operator "onnx.Unsqueeze"(%2355, %2359) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2361 = torch.operator "onnx.Concat"(%2357, %2358, %2360) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %2362 = torch.operator "onnx.Reshape"(%2353, %2361) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %2363 = torch.operator "onnx.Cast"(%2362) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %2364 = torch.operator "onnx.Shape"(%2113) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %2365 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_48_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %2366 = torch.operator "onnx.Gather"(%2364, %2365) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %2367 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_49_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2368 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_50_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2369 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_51_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2370 = torch.operator "onnx.Unsqueeze"(%2366, %2369) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2371 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_52_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2372 = torch.operator "onnx.Slice"(%2363, %2368, %2370, %2367, %2371) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %2373 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_53_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2374 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_54_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2375 = torch.operator "onnx.Unsqueeze"(%2366, %2374) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2376 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_55_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2377 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_attn_Constant_56_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2378 = torch.operator "onnx.Slice"(%2363, %2375, %2376, %2373, %2377) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %2379 = torch.operator "onnx.MatMul"(%2378, %771) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %2380 = torch.operator "onnx.Add"(%66, %2379) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %2381 = torch.operator "onnx.MatMul"(%2372, %772) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %2382 = torch.operator "onnx.Add"(%67, %2381) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %2383 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2384 = torch.operator "onnx.Unsqueeze"(%2055, %2383) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %2385 = torch.operator "onnx.Mul"(%2384, %2380) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %2386 = torch.operator "onnx.Add"(%2001, %2385) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %_2Ftransformer_blocks.22Fnorm22FConstant_attr__value = util.global.load @"/transformer_blocks.2/norm2/Constant_attr__value" : tensor<3072xbf16> %2387 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.22Fnorm22FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.22Fnorm22FConstant_1_attr__value = util.global.load @"/transformer_blocks.2/norm2/Constant_1_attr__value" : tensor<3072xbf16> %2388 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.22Fnorm22FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %2389 = torch.operator "onnx.LayerNormalization"(%2386, %2387, %2388) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2390 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2391 = torch.operator "onnx.Unsqueeze"(%2061, %2390) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %2392 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %2393 = torch.operator "onnx.Add"(%2391, %2392) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %2394 = torch.operator "onnx.Mul"(%2389, %2393) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2395 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2396 = torch.operator "onnx.Unsqueeze"(%2058, %2395) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %2397 = torch.operator "onnx.Add"(%2394, %2396) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2398 = torch.operator "onnx.MatMul"(%2397, %773) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %2399 = torch.operator "onnx.Add"(%70, %2398) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %2400 = torch.operator "onnx.Mul"(%2399, %2399) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %2401 = torch.operator "onnx.Mul"(%2399, %2400) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %2402 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_ff_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %2403 = torch.operator "onnx.Mul"(%2402, %2401) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %2404 = torch.operator "onnx.Add"(%2399, %2403) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %2405 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_ff_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %2406 = torch.operator "onnx.Mul"(%2405, %2404) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %2407 = torch.operator "onnx.Tanh"(%2406) : (!torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %2408 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_ff_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %2409 = torch.operator "onnx.Add"(%2408, %2407) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %2410 = torch.operator "onnx.Mul"(%2399, %2409) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %2411 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_ff_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %2412 = torch.operator "onnx.Mul"(%2411, %2410) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %2413 = torch.operator "onnx.MatMul"(%2412, %774) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2414 = torch.operator "onnx.Add"(%71, %2413) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2415 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2416 = torch.operator "onnx.Unsqueeze"(%2064, %2415) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %2417 = torch.operator "onnx.Mul"(%2416, %2414) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2418 = torch.operator "onnx.Add"(%2386, %2417) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2419 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2420 = torch.operator "onnx.Unsqueeze"(%2093, %2419) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %2421 = torch.operator "onnx.Mul"(%2420, %2382) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %2422 = torch.operator "onnx.Add"(%2037, %2421) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %_2Ftransformer_blocks.22Fnorm2_context2FConstant_attr__value = util.global.load @"/transformer_blocks.2/norm2_context/Constant_attr__value" : tensor<3072xbf16> %2423 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.22Fnorm2_context2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.22Fnorm2_context2FConstant_1_attr__value = util.global.load @"/transformer_blocks.2/norm2_context/Constant_1_attr__value" : tensor<3072xbf16> %2424 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.22Fnorm2_context2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %2425 = torch.operator "onnx.LayerNormalization"(%2422, %2423, %2424) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2426 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2427 = torch.operator "onnx.Unsqueeze"(%2099, %2426) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %2428 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_Constant_7_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %2429 = torch.operator "onnx.Add"(%2427, %2428) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %2430 = torch.operator "onnx.Mul"(%2425, %2429) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2431 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2432 = torch.operator "onnx.Unsqueeze"(%2096, %2431) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %2433 = torch.operator "onnx.Add"(%2430, %2432) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2434 = torch.operator "onnx.MatMul"(%2433, %775) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %2435 = torch.operator "onnx.Add"(%72, %2434) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %2436 = torch.operator "onnx.Mul"(%2435, %2435) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %2437 = torch.operator "onnx.Mul"(%2435, %2436) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %2438 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_ff_context_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %2439 = torch.operator "onnx.Mul"(%2438, %2437) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %2440 = torch.operator "onnx.Add"(%2435, %2439) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %2441 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_ff_context_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %2442 = torch.operator "onnx.Mul"(%2441, %2440) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %2443 = torch.operator "onnx.Tanh"(%2442) : (!torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %2444 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_ff_context_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %2445 = torch.operator "onnx.Add"(%2444, %2443) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %2446 = torch.operator "onnx.Mul"(%2435, %2445) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %2447 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_ff_context_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %2448 = torch.operator "onnx.Mul"(%2447, %2446) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %2449 = torch.operator "onnx.MatMul"(%2448, %776) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2450 = torch.operator "onnx.Add"(%73, %2449) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2451 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.2_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2452 = torch.operator "onnx.Unsqueeze"(%2102, %2451) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %2453 = torch.operator "onnx.Mul"(%2452, %2450) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2454 = torch.operator "onnx.Add"(%2422, %2453) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2455 = torch.operator "onnx.Gemm"(%1285, %74, %75) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %2456 = torch.operator "onnx.Shape"(%2455) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %2457 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_norm1_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2458 = torch.operator "onnx.Gather"(%2456, %2457) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2459 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_norm1_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2460 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_norm1_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2461 = torch.operator "onnx.Add"(%2458, %2460) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2462 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_norm1_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2463 = torch.operator "onnx.Div"(%2461, %2462) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2464 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_norm1_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2465 = torch.operator "onnx.Mul"(%2463, %2464) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2466 = torch.operator "onnx.Slice"(%2455, %2459, %2465, %2457) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %2467 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_norm1_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2468 = torch.operator "onnx.Mul"(%2463, %2467) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2469 = torch.operator "onnx.Slice"(%2455, %2465, %2468, %2457) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %2470 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_norm1_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2471 = torch.operator "onnx.Mul"(%2463, %2470) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2472 = torch.operator "onnx.Slice"(%2455, %2468, %2471, %2457) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %2473 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_norm1_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2474 = torch.operator "onnx.Mul"(%2463, %2473) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2475 = torch.operator "onnx.Slice"(%2455, %2471, %2474, %2457) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %2476 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_norm1_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2477 = torch.operator "onnx.Mul"(%2463, %2476) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2478 = torch.operator "onnx.Slice"(%2455, %2474, %2477, %2457) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %2479 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_norm1_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2480 = torch.operator "onnx.Mul"(%2463, %2479) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2481 = torch.operator "onnx.Slice"(%2455, %2477, %2480, %2457) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.32Fnorm12Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.3/norm1/norm/Constant_attr__value" : tensor<3072xbf16> %2482 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.32Fnorm12Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.32Fnorm12Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.3/norm1/norm/Constant_1_attr__value" : tensor<3072xbf16> %2483 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.32Fnorm12Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %2484 = torch.operator "onnx.LayerNormalization"(%2418, %2482, %2483) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2485 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_norm1_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2486 = torch.operator "onnx.Unsqueeze"(%2469, %2485) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %2487 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_norm1_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %2488 = torch.operator "onnx.Add"(%2486, %2487) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %2489 = torch.operator "onnx.Mul"(%2484, %2488) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2490 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_norm1_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2491 = torch.operator "onnx.Unsqueeze"(%2466, %2490) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %2492 = torch.operator "onnx.Add"(%2489, %2491) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2493 = torch.operator "onnx.Gemm"(%1285, %76, %77) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %2494 = torch.operator "onnx.Shape"(%2493) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %2495 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_norm1_context_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2496 = torch.operator "onnx.Gather"(%2494, %2495) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2497 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_norm1_context_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2498 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_norm1_context_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2499 = torch.operator "onnx.Add"(%2496, %2498) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2500 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_norm1_context_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2501 = torch.operator "onnx.Div"(%2499, %2500) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2502 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_norm1_context_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2503 = torch.operator "onnx.Mul"(%2501, %2502) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2504 = torch.operator "onnx.Slice"(%2493, %2497, %2503, %2495) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %2505 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_norm1_context_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2506 = torch.operator "onnx.Mul"(%2501, %2505) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2507 = torch.operator "onnx.Slice"(%2493, %2503, %2506, %2495) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %2508 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_norm1_context_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2509 = torch.operator "onnx.Mul"(%2501, %2508) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2510 = torch.operator "onnx.Slice"(%2493, %2506, %2509, %2495) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %2511 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_norm1_context_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2512 = torch.operator "onnx.Mul"(%2501, %2511) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2513 = torch.operator "onnx.Slice"(%2493, %2509, %2512, %2495) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %2514 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_norm1_context_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2515 = torch.operator "onnx.Mul"(%2501, %2514) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2516 = torch.operator "onnx.Slice"(%2493, %2512, %2515, %2495) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %2517 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_norm1_context_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2518 = torch.operator "onnx.Mul"(%2501, %2517) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2519 = torch.operator "onnx.Slice"(%2493, %2515, %2518, %2495) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.32Fnorm1_context2Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.3/norm1_context/norm/Constant_attr__value" : tensor<3072xbf16> %2520 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.32Fnorm1_context2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.32Fnorm1_context2Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.3/norm1_context/norm/Constant_1_attr__value" : tensor<3072xbf16> %2521 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.32Fnorm1_context2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %2522 = torch.operator "onnx.LayerNormalization"(%2454, %2520, %2521) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2523 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_norm1_context_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2524 = torch.operator "onnx.Unsqueeze"(%2507, %2523) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %2525 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_norm1_context_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %2526 = torch.operator "onnx.Add"(%2524, %2525) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %2527 = torch.operator "onnx.Mul"(%2522, %2526) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2528 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_norm1_context_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2529 = torch.operator "onnx.Unsqueeze"(%2504, %2528) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %2530 = torch.operator "onnx.Add"(%2527, %2529) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2531 = torch.operator "onnx.Shape"(%2530) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %2532 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %2533 = torch.operator "onnx.Gather"(%2531, %2532) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %2534 = torch.operator "onnx.MatMul"(%2492, %777) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2535 = torch.operator "onnx.Add"(%80, %2534) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2536 = torch.operator "onnx.MatMul"(%2492, %778) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2537 = torch.operator "onnx.Add"(%81, %2536) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2538 = torch.operator "onnx.MatMul"(%2492, %779) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2539 = torch.operator "onnx.Add"(%82, %2538) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2540 = torch.operator "onnx.Shape"(%2537) : (!torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[3],si64> %2541 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %2542 = torch.operator "onnx.Gather"(%2540, %2541) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %2543 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %2544 = torch.operator "onnx.Div"(%2542, %2543) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %2545 = torch.operator "onnx.Cast"(%2544) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %2546 = torch.operator "onnx.Cast"(%2545) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %2547 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7089_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2548 = torch.operator "onnx.Unsqueeze"(%2533, %2547) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2549 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2550 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2551 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7093_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2552 = torch.operator "onnx.Unsqueeze"(%2546, %2551) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2553 = torch.operator "onnx.Concat"(%2548, %2549, %2550, %2552) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %2554 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7096_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2555 = torch.operator "onnx.Unsqueeze"(%2533, %2554) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2556 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2557 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2558 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7100_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2559 = torch.operator "onnx.Unsqueeze"(%2546, %2558) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2560 = torch.operator "onnx.Concat"(%2555, %2556, %2557, %2559) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %2561 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7103_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2562 = torch.operator "onnx.Unsqueeze"(%2533, %2561) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2563 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2564 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2565 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7107_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2566 = torch.operator "onnx.Unsqueeze"(%2546, %2565) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2567 = torch.operator "onnx.Concat"(%2562, %2563, %2564, %2566) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %2568 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7110_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2569 = torch.operator "onnx.Unsqueeze"(%2533, %2568) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2570 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2571 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2572 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7114_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2573 = torch.operator "onnx.Unsqueeze"(%2546, %2572) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2574 = torch.operator "onnx.Concat"(%2569, %2570, %2571, %2573) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %2575 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7117_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2576 = torch.operator "onnx.Unsqueeze"(%2533, %2575) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2577 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_11_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2578 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2579 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7121_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2580 = torch.operator "onnx.Unsqueeze"(%2546, %2579) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2581 = torch.operator "onnx.Concat"(%2576, %2577, %2578, %2580) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %2582 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7124_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2583 = torch.operator "onnx.Unsqueeze"(%2533, %2582) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2584 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2585 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_14_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2586 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7128_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2587 = torch.operator "onnx.Unsqueeze"(%2546, %2586) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2588 = torch.operator "onnx.Concat"(%2583, %2584, %2585, %2587) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %2589 = torch.operator "onnx.Reshape"(%2535, %2553) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %2590 = torch.operator "onnx.Transpose"(%2589) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %2591 = torch.operator "onnx.Reshape"(%2537, %2560) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %2592 = torch.operator "onnx.Transpose"(%2591) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %2593 = torch.operator "onnx.Reshape"(%2539, %2567) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %2594 = torch.operator "onnx.Transpose"(%2593) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %2595 = torch.operator "onnx.Cast"(%2590) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %2596 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %2597 = torch.operator "onnx.Pow"(%2595, %2596) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %2598 = torch.operator "onnx.ReduceMean"(%2597) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %2599 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %2600 = torch.operator "onnx.Add"(%2598, %2599) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %2601 = torch.operator "onnx.Sqrt"(%2600) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %2602 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %2603 = torch.operator "onnx.Div"(%2602, %2601) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %2604 = torch.operator "onnx.Cast"(%2590) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %2605 = torch.operator "onnx.Mul"(%2604, %2603) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %2606 = torch.operator "onnx.Cast"(%2605) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %2607 = torch.operator "onnx.Mul"(%2606, %78) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %2608 = torch.operator "onnx.Cast"(%2592) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %2609 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %2610 = torch.operator "onnx.Pow"(%2608, %2609) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %2611 = torch.operator "onnx.ReduceMean"(%2610) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %2612 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %2613 = torch.operator "onnx.Add"(%2611, %2612) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %2614 = torch.operator "onnx.Sqrt"(%2613) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %2615 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %2616 = torch.operator "onnx.Div"(%2615, %2614) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %2617 = torch.operator "onnx.Cast"(%2592) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %2618 = torch.operator "onnx.Mul"(%2617, %2616) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %2619 = torch.operator "onnx.Cast"(%2618) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %2620 = torch.operator "onnx.Mul"(%2619, %79) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %2621 = torch.operator "onnx.MatMul"(%2530, %780) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2622 = torch.operator "onnx.Add"(%85, %2621) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2623 = torch.operator "onnx.MatMul"(%2530, %781) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2624 = torch.operator "onnx.Add"(%83, %2623) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2625 = torch.operator "onnx.MatMul"(%2530, %782) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2626 = torch.operator "onnx.Add"(%84, %2625) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2627 = torch.operator "onnx.Reshape"(%2622, %2574) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %2628 = torch.operator "onnx.Transpose"(%2627) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %2629 = torch.operator "onnx.Reshape"(%2624, %2581) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %2630 = torch.operator "onnx.Transpose"(%2629) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %2631 = torch.operator "onnx.Reshape"(%2626, %2588) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %2632 = torch.operator "onnx.Transpose"(%2631) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %2633 = torch.operator "onnx.Cast"(%2628) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %2634 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_norm_added_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %2635 = torch.operator "onnx.Pow"(%2633, %2634) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %2636 = torch.operator "onnx.ReduceMean"(%2635) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %2637 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_norm_added_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %2638 = torch.operator "onnx.Add"(%2636, %2637) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %2639 = torch.operator "onnx.Sqrt"(%2638) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %2640 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_norm_added_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %2641 = torch.operator "onnx.Div"(%2640, %2639) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %2642 = torch.operator "onnx.Cast"(%2628) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %2643 = torch.operator "onnx.Mul"(%2642, %2641) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %2644 = torch.operator "onnx.Cast"(%2643) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %2645 = torch.operator "onnx.Mul"(%2644, %88) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %2646 = torch.operator "onnx.Cast"(%2630) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %2647 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_norm_added_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %2648 = torch.operator "onnx.Pow"(%2646, %2647) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %2649 = torch.operator "onnx.ReduceMean"(%2648) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %2650 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_norm_added_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %2651 = torch.operator "onnx.Add"(%2649, %2650) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %2652 = torch.operator "onnx.Sqrt"(%2651) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %2653 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_norm_added_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %2654 = torch.operator "onnx.Div"(%2653, %2652) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %2655 = torch.operator "onnx.Cast"(%2630) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %2656 = torch.operator "onnx.Mul"(%2655, %2654) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %2657 = torch.operator "onnx.Cast"(%2656) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %2658 = torch.operator "onnx.Mul"(%2657, %89) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %2659 = torch.operator "onnx.Concat"(%2645, %2607) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %2660 = torch.operator "onnx.Concat"(%2658, %2620) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %2661 = torch.operator "onnx.Concat"(%2632, %2594) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %2662 = torch.operator "onnx.Shape"(%2659) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %2663 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_15_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %2664 = torch.operator "onnx.Gather"(%2662, %2663) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %2665 = torch.operator "onnx.Shape"(%2659) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %2666 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_16_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %2667 = torch.operator "onnx.Gather"(%2665, %2666) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %2668 = torch.operator "onnx.Shape"(%2659) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %2669 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_17_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %2670 = torch.operator "onnx.Gather"(%2668, %2669) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %2671 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7213_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2672 = torch.operator "onnx.Unsqueeze"(%2664, %2671) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2673 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7215_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2674 = torch.operator "onnx.Unsqueeze"(%2667, %2673) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2675 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7217_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2676 = torch.operator "onnx.Unsqueeze"(%2670, %2675) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2677 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2678 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2679 = torch.operator "onnx.Concat"(%2672, %2674, %2676, %2677, %2678) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %2680 = torch.operator "onnx.Reshape"(%2659, %2679) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %2681 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_20_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %2682:2 = torch.operator "onnx.Split"(%2680, %2681) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %2683 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2684 = torch.operator "onnx.Squeeze"(%2682#0, %2683) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %2685 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2686 = torch.operator "onnx.Squeeze"(%2682#1, %2685) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %2687 = torch.operator "onnx.Neg"(%2686) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %2688 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_23_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2689 = torch.operator "onnx.Unsqueeze"(%2687, %2688) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %2690 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_24_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2691 = torch.operator "onnx.Unsqueeze"(%2684, %2690) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %2692 = torch.operator "onnx.Concat"(%2689, %2691) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %2693 = torch.operator "onnx.Shape"(%2692) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %2694 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_25_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2695 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2696 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2697 = torch.operator "onnx.Slice"(%2693, %2695, %2696, %2694) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %2698 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_28_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2699 = torch.operator "onnx.Concat"(%2697, %2698) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %2700 = torch.operator "onnx.Reshape"(%2692, %2699) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %2701 = torch.operator "onnx.Cast"(%2659) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %2702 = torch.operator "onnx.Mul"(%2701, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %2703 = torch.operator "onnx.Cast"(%2700) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %2704 = torch.operator "onnx.Mul"(%2703, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %2705 = torch.operator "onnx.Add"(%2702, %2704) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %2706 = torch.operator "onnx.Cast"(%2705) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %2707 = torch.operator "onnx.Shape"(%2660) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %2708 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_29_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %2709 = torch.operator "onnx.Gather"(%2707, %2708) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %2710 = torch.operator "onnx.Shape"(%2660) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %2711 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_30_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %2712 = torch.operator "onnx.Gather"(%2710, %2711) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %2713 = torch.operator "onnx.Shape"(%2660) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %2714 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_31_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %2715 = torch.operator "onnx.Gather"(%2713, %2714) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %2716 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7258_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2717 = torch.operator "onnx.Unsqueeze"(%2709, %2716) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2718 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7260_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2719 = torch.operator "onnx.Unsqueeze"(%2712, %2718) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2720 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7262_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2721 = torch.operator "onnx.Unsqueeze"(%2715, %2720) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2722 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2723 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2724 = torch.operator "onnx.Concat"(%2717, %2719, %2721, %2722, %2723) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %2725 = torch.operator "onnx.Reshape"(%2660, %2724) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %2726 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_34_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %2727:2 = torch.operator "onnx.Split"(%2725, %2726) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %2728 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2729 = torch.operator "onnx.Squeeze"(%2727#0, %2728) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %2730 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2731 = torch.operator "onnx.Squeeze"(%2727#1, %2730) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %2732 = torch.operator "onnx.Neg"(%2731) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %2733 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2734 = torch.operator "onnx.Unsqueeze"(%2732, %2733) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %2735 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2736 = torch.operator "onnx.Unsqueeze"(%2729, %2735) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %2737 = torch.operator "onnx.Concat"(%2734, %2736) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %2738 = torch.operator "onnx.Shape"(%2737) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %2739 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_39_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2740 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_40_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2741 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2742 = torch.operator "onnx.Slice"(%2738, %2740, %2741, %2739) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %2743 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_42_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2744 = torch.operator "onnx.Concat"(%2742, %2743) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %2745 = torch.operator "onnx.Reshape"(%2737, %2744) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %2746 = torch.operator "onnx.Cast"(%2660) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %2747 = torch.operator "onnx.Mul"(%2746, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %2748 = torch.operator "onnx.Cast"(%2745) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %2749 = torch.operator "onnx.Mul"(%2748, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %2750 = torch.operator "onnx.Add"(%2747, %2749) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %2751 = torch.operator "onnx.Cast"(%2750) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %2752 = torch.operator "onnx.Shape"(%2706) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %2753 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_43_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2754 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_44_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2755 = torch.operator "onnx.Slice"(%2752, %2753, %2754) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2756 = torch.operator "onnx.Cast"(%2755) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %2757 = torch.operator "onnx.Sqrt"(%2756) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %2758 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_45_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %2759 = torch.operator "onnx.Cast"(%2757) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %2760 = torch.operator "onnx.Div"(%2758, %2759) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %2761 = torch.operator "onnx.Cast"(%2760) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %2762 = torch.operator "onnx.Transpose"(%2751) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %2763 = torch.operator "onnx.Sqrt"(%2761) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %2764 = torch.operator "onnx.Mul"(%2706, %2763) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %2765 = torch.operator "onnx.Sqrt"(%2761) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %2766 = torch.operator "onnx.Mul"(%2762, %2765) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %2767 = torch.operator "onnx.MatMul"(%2764, %2766) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %2768 = torch.operator "onnx.Softmax"(%2767) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %2769 = torch.operator "onnx.MatMul"(%2768, %2661) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %2770 = torch.operator "onnx.Transpose"(%2769) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %2771 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_46_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %2772 = torch.operator "onnx.Mul"(%2546, %2771) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %2773 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7315_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2774 = torch.operator "onnx.Unsqueeze"(%2533, %2773) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2775 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_47_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2776 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7318_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2777 = torch.operator "onnx.Unsqueeze"(%2772, %2776) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2778 = torch.operator "onnx.Concat"(%2774, %2775, %2777) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %2779 = torch.operator "onnx.Reshape"(%2770, %2778) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %2780 = torch.operator "onnx.Cast"(%2779) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %2781 = torch.operator "onnx.Shape"(%2530) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %2782 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_48_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %2783 = torch.operator "onnx.Gather"(%2781, %2782) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %2784 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_49_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2785 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_50_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2786 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_51_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2787 = torch.operator "onnx.Unsqueeze"(%2783, %2786) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2788 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_52_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2789 = torch.operator "onnx.Slice"(%2780, %2785, %2787, %2784, %2788) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %2790 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_53_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2791 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_54_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2792 = torch.operator "onnx.Unsqueeze"(%2783, %2791) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2793 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_55_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2794 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_attn_Constant_56_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2795 = torch.operator "onnx.Slice"(%2780, %2792, %2793, %2790, %2794) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %2796 = torch.operator "onnx.MatMul"(%2795, %783) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %2797 = torch.operator "onnx.Add"(%86, %2796) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %2798 = torch.operator "onnx.MatMul"(%2789, %784) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %2799 = torch.operator "onnx.Add"(%87, %2798) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %2800 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2801 = torch.operator "onnx.Unsqueeze"(%2472, %2800) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %2802 = torch.operator "onnx.Mul"(%2801, %2797) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %2803 = torch.operator "onnx.Add"(%2418, %2802) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %_2Ftransformer_blocks.32Fnorm22FConstant_attr__value = util.global.load @"/transformer_blocks.3/norm2/Constant_attr__value" : tensor<3072xbf16> %2804 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.32Fnorm22FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.32Fnorm22FConstant_1_attr__value = util.global.load @"/transformer_blocks.3/norm2/Constant_1_attr__value" : tensor<3072xbf16> %2805 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.32Fnorm22FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %2806 = torch.operator "onnx.LayerNormalization"(%2803, %2804, %2805) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2807 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2808 = torch.operator "onnx.Unsqueeze"(%2478, %2807) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %2809 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %2810 = torch.operator "onnx.Add"(%2808, %2809) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %2811 = torch.operator "onnx.Mul"(%2806, %2810) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2812 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2813 = torch.operator "onnx.Unsqueeze"(%2475, %2812) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %2814 = torch.operator "onnx.Add"(%2811, %2813) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2815 = torch.operator "onnx.MatMul"(%2814, %785) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %2816 = torch.operator "onnx.Add"(%90, %2815) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %2817 = torch.operator "onnx.Mul"(%2816, %2816) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %2818 = torch.operator "onnx.Mul"(%2816, %2817) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %2819 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_ff_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %2820 = torch.operator "onnx.Mul"(%2819, %2818) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %2821 = torch.operator "onnx.Add"(%2816, %2820) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %2822 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_ff_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %2823 = torch.operator "onnx.Mul"(%2822, %2821) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %2824 = torch.operator "onnx.Tanh"(%2823) : (!torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %2825 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_ff_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %2826 = torch.operator "onnx.Add"(%2825, %2824) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %2827 = torch.operator "onnx.Mul"(%2816, %2826) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %2828 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_ff_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %2829 = torch.operator "onnx.Mul"(%2828, %2827) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %2830 = torch.operator "onnx.MatMul"(%2829, %786) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2831 = torch.operator "onnx.Add"(%91, %2830) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2832 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2833 = torch.operator "onnx.Unsqueeze"(%2481, %2832) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %2834 = torch.operator "onnx.Mul"(%2833, %2831) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2835 = torch.operator "onnx.Add"(%2803, %2834) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2836 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2837 = torch.operator "onnx.Unsqueeze"(%2510, %2836) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %2838 = torch.operator "onnx.Mul"(%2837, %2799) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %2839 = torch.operator "onnx.Add"(%2454, %2838) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %_2Ftransformer_blocks.32Fnorm2_context2FConstant_attr__value = util.global.load @"/transformer_blocks.3/norm2_context/Constant_attr__value" : tensor<3072xbf16> %2840 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.32Fnorm2_context2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.32Fnorm2_context2FConstant_1_attr__value = util.global.load @"/transformer_blocks.3/norm2_context/Constant_1_attr__value" : tensor<3072xbf16> %2841 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.32Fnorm2_context2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %2842 = torch.operator "onnx.LayerNormalization"(%2839, %2840, %2841) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2843 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2844 = torch.operator "onnx.Unsqueeze"(%2516, %2843) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %2845 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_Constant_7_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %2846 = torch.operator "onnx.Add"(%2844, %2845) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %2847 = torch.operator "onnx.Mul"(%2842, %2846) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2848 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2849 = torch.operator "onnx.Unsqueeze"(%2513, %2848) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %2850 = torch.operator "onnx.Add"(%2847, %2849) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2851 = torch.operator "onnx.MatMul"(%2850, %787) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %2852 = torch.operator "onnx.Add"(%92, %2851) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %2853 = torch.operator "onnx.Mul"(%2852, %2852) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %2854 = torch.operator "onnx.Mul"(%2852, %2853) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %2855 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_ff_context_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %2856 = torch.operator "onnx.Mul"(%2855, %2854) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %2857 = torch.operator "onnx.Add"(%2852, %2856) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %2858 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_ff_context_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %2859 = torch.operator "onnx.Mul"(%2858, %2857) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %2860 = torch.operator "onnx.Tanh"(%2859) : (!torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %2861 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_ff_context_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %2862 = torch.operator "onnx.Add"(%2861, %2860) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %2863 = torch.operator "onnx.Mul"(%2852, %2862) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %2864 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_ff_context_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %2865 = torch.operator "onnx.Mul"(%2864, %2863) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %2866 = torch.operator "onnx.MatMul"(%2865, %788) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2867 = torch.operator "onnx.Add"(%93, %2866) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2868 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.3_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2869 = torch.operator "onnx.Unsqueeze"(%2519, %2868) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %2870 = torch.operator "onnx.Mul"(%2869, %2867) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2871 = torch.operator "onnx.Add"(%2839, %2870) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2872 = torch.operator "onnx.Gemm"(%1285, %94, %95) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %2873 = torch.operator "onnx.Shape"(%2872) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %2874 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_norm1_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2875 = torch.operator "onnx.Gather"(%2873, %2874) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2876 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_norm1_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2877 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_norm1_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2878 = torch.operator "onnx.Add"(%2875, %2877) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2879 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_norm1_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2880 = torch.operator "onnx.Div"(%2878, %2879) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2881 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_norm1_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2882 = torch.operator "onnx.Mul"(%2880, %2881) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2883 = torch.operator "onnx.Slice"(%2872, %2876, %2882, %2874) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %2884 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_norm1_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2885 = torch.operator "onnx.Mul"(%2880, %2884) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2886 = torch.operator "onnx.Slice"(%2872, %2882, %2885, %2874) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %2887 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_norm1_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2888 = torch.operator "onnx.Mul"(%2880, %2887) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2889 = torch.operator "onnx.Slice"(%2872, %2885, %2888, %2874) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %2890 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_norm1_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2891 = torch.operator "onnx.Mul"(%2880, %2890) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2892 = torch.operator "onnx.Slice"(%2872, %2888, %2891, %2874) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %2893 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_norm1_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2894 = torch.operator "onnx.Mul"(%2880, %2893) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2895 = torch.operator "onnx.Slice"(%2872, %2891, %2894, %2874) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %2896 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_norm1_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2897 = torch.operator "onnx.Mul"(%2880, %2896) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2898 = torch.operator "onnx.Slice"(%2872, %2894, %2897, %2874) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.42Fnorm12Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.4/norm1/norm/Constant_attr__value" : tensor<3072xbf16> %2899 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.42Fnorm12Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.42Fnorm12Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.4/norm1/norm/Constant_1_attr__value" : tensor<3072xbf16> %2900 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.42Fnorm12Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %2901 = torch.operator "onnx.LayerNormalization"(%2835, %2899, %2900) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2902 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_norm1_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2903 = torch.operator "onnx.Unsqueeze"(%2886, %2902) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %2904 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_norm1_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %2905 = torch.operator "onnx.Add"(%2903, %2904) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %2906 = torch.operator "onnx.Mul"(%2901, %2905) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2907 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_norm1_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2908 = torch.operator "onnx.Unsqueeze"(%2883, %2907) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %2909 = torch.operator "onnx.Add"(%2906, %2908) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2910 = torch.operator "onnx.Gemm"(%1285, %96, %97) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %2911 = torch.operator "onnx.Shape"(%2910) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %2912 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_norm1_context_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2913 = torch.operator "onnx.Gather"(%2911, %2912) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2914 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_norm1_context_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2915 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_norm1_context_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2916 = torch.operator "onnx.Add"(%2913, %2915) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2917 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_norm1_context_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2918 = torch.operator "onnx.Div"(%2916, %2917) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2919 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_norm1_context_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2920 = torch.operator "onnx.Mul"(%2918, %2919) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2921 = torch.operator "onnx.Slice"(%2910, %2914, %2920, %2912) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %2922 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_norm1_context_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2923 = torch.operator "onnx.Mul"(%2918, %2922) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2924 = torch.operator "onnx.Slice"(%2910, %2920, %2923, %2912) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %2925 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_norm1_context_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2926 = torch.operator "onnx.Mul"(%2918, %2925) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2927 = torch.operator "onnx.Slice"(%2910, %2923, %2926, %2912) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %2928 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_norm1_context_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2929 = torch.operator "onnx.Mul"(%2918, %2928) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2930 = torch.operator "onnx.Slice"(%2910, %2926, %2929, %2912) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %2931 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_norm1_context_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2932 = torch.operator "onnx.Mul"(%2918, %2931) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2933 = torch.operator "onnx.Slice"(%2910, %2929, %2932, %2912) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %2934 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_norm1_context_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2935 = torch.operator "onnx.Mul"(%2918, %2934) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2936 = torch.operator "onnx.Slice"(%2910, %2932, %2935, %2912) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.42Fnorm1_context2Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.4/norm1_context/norm/Constant_attr__value" : tensor<3072xbf16> %2937 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.42Fnorm1_context2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.42Fnorm1_context2Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.4/norm1_context/norm/Constant_1_attr__value" : tensor<3072xbf16> %2938 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.42Fnorm1_context2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %2939 = torch.operator "onnx.LayerNormalization"(%2871, %2937, %2938) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2940 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_norm1_context_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2941 = torch.operator "onnx.Unsqueeze"(%2924, %2940) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %2942 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_norm1_context_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %2943 = torch.operator "onnx.Add"(%2941, %2942) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %2944 = torch.operator "onnx.Mul"(%2939, %2943) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2945 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_norm1_context_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2946 = torch.operator "onnx.Unsqueeze"(%2921, %2945) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %2947 = torch.operator "onnx.Add"(%2944, %2946) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %2948 = torch.operator "onnx.Shape"(%2947) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %2949 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %2950 = torch.operator "onnx.Gather"(%2948, %2949) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %2951 = torch.operator "onnx.MatMul"(%2909, %789) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2952 = torch.operator "onnx.Add"(%100, %2951) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2953 = torch.operator "onnx.MatMul"(%2909, %790) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2954 = torch.operator "onnx.Add"(%101, %2953) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2955 = torch.operator "onnx.MatMul"(%2909, %791) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2956 = torch.operator "onnx.Add"(%102, %2955) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %2957 = torch.operator "onnx.Shape"(%2954) : (!torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[3],si64> %2958 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %2959 = torch.operator "onnx.Gather"(%2957, %2958) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %2960 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %2961 = torch.operator "onnx.Div"(%2959, %2960) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %2962 = torch.operator "onnx.Cast"(%2961) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %2963 = torch.operator "onnx.Cast"(%2962) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %2964 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7506_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2965 = torch.operator "onnx.Unsqueeze"(%2950, %2964) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2966 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2967 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2968 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7510_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2969 = torch.operator "onnx.Unsqueeze"(%2963, %2968) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2970 = torch.operator "onnx.Concat"(%2965, %2966, %2967, %2969) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %2971 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7513_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2972 = torch.operator "onnx.Unsqueeze"(%2950, %2971) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2973 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2974 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2975 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7517_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2976 = torch.operator "onnx.Unsqueeze"(%2963, %2975) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2977 = torch.operator "onnx.Concat"(%2972, %2973, %2974, %2976) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %2978 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7520_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2979 = torch.operator "onnx.Unsqueeze"(%2950, %2978) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2980 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2981 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2982 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7524_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2983 = torch.operator "onnx.Unsqueeze"(%2963, %2982) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2984 = torch.operator "onnx.Concat"(%2979, %2980, %2981, %2983) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %2985 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7527_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2986 = torch.operator "onnx.Unsqueeze"(%2950, %2985) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2987 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2988 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2989 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7531_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2990 = torch.operator "onnx.Unsqueeze"(%2963, %2989) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2991 = torch.operator "onnx.Concat"(%2986, %2987, %2988, %2990) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %2992 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7534_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2993 = torch.operator "onnx.Unsqueeze"(%2950, %2992) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2994 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_11_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2995 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2996 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7538_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %2997 = torch.operator "onnx.Unsqueeze"(%2963, %2996) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %2998 = torch.operator "onnx.Concat"(%2993, %2994, %2995, %2997) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %2999 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7541_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3000 = torch.operator "onnx.Unsqueeze"(%2950, %2999) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3001 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3002 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_14_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3003 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7545_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3004 = torch.operator "onnx.Unsqueeze"(%2963, %3003) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3005 = torch.operator "onnx.Concat"(%3000, %3001, %3002, %3004) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %3006 = torch.operator "onnx.Reshape"(%2952, %2970) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %3007 = torch.operator "onnx.Transpose"(%3006) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %3008 = torch.operator "onnx.Reshape"(%2954, %2977) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %3009 = torch.operator "onnx.Transpose"(%3008) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %3010 = torch.operator "onnx.Reshape"(%2956, %2984) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %3011 = torch.operator "onnx.Transpose"(%3010) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %3012 = torch.operator "onnx.Cast"(%3007) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %3013 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %3014 = torch.operator "onnx.Pow"(%3012, %3013) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %3015 = torch.operator "onnx.ReduceMean"(%3014) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3016 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %3017 = torch.operator "onnx.Add"(%3015, %3016) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3018 = torch.operator "onnx.Sqrt"(%3017) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3019 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %3020 = torch.operator "onnx.Div"(%3019, %3018) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3021 = torch.operator "onnx.Cast"(%3007) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %3022 = torch.operator "onnx.Mul"(%3021, %3020) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %3023 = torch.operator "onnx.Cast"(%3022) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %3024 = torch.operator "onnx.Mul"(%3023, %98) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %3025 = torch.operator "onnx.Cast"(%3009) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %3026 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %3027 = torch.operator "onnx.Pow"(%3025, %3026) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %3028 = torch.operator "onnx.ReduceMean"(%3027) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3029 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %3030 = torch.operator "onnx.Add"(%3028, %3029) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3031 = torch.operator "onnx.Sqrt"(%3030) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3032 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %3033 = torch.operator "onnx.Div"(%3032, %3031) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3034 = torch.operator "onnx.Cast"(%3009) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %3035 = torch.operator "onnx.Mul"(%3034, %3033) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %3036 = torch.operator "onnx.Cast"(%3035) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %3037 = torch.operator "onnx.Mul"(%3036, %99) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %3038 = torch.operator "onnx.MatMul"(%2947, %792) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3039 = torch.operator "onnx.Add"(%105, %3038) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3040 = torch.operator "onnx.MatMul"(%2947, %793) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3041 = torch.operator "onnx.Add"(%103, %3040) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3042 = torch.operator "onnx.MatMul"(%2947, %794) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3043 = torch.operator "onnx.Add"(%104, %3042) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3044 = torch.operator "onnx.Reshape"(%3039, %2991) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %3045 = torch.operator "onnx.Transpose"(%3044) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %3046 = torch.operator "onnx.Reshape"(%3041, %2998) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %3047 = torch.operator "onnx.Transpose"(%3046) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %3048 = torch.operator "onnx.Reshape"(%3043, %3005) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %3049 = torch.operator "onnx.Transpose"(%3048) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %3050 = torch.operator "onnx.Cast"(%3045) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %3051 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_norm_added_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %3052 = torch.operator "onnx.Pow"(%3050, %3051) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %3053 = torch.operator "onnx.ReduceMean"(%3052) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3054 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_norm_added_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %3055 = torch.operator "onnx.Add"(%3053, %3054) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3056 = torch.operator "onnx.Sqrt"(%3055) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3057 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_norm_added_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %3058 = torch.operator "onnx.Div"(%3057, %3056) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3059 = torch.operator "onnx.Cast"(%3045) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %3060 = torch.operator "onnx.Mul"(%3059, %3058) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %3061 = torch.operator "onnx.Cast"(%3060) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %3062 = torch.operator "onnx.Mul"(%3061, %108) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %3063 = torch.operator "onnx.Cast"(%3047) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %3064 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_norm_added_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %3065 = torch.operator "onnx.Pow"(%3063, %3064) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %3066 = torch.operator "onnx.ReduceMean"(%3065) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3067 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_norm_added_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %3068 = torch.operator "onnx.Add"(%3066, %3067) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3069 = torch.operator "onnx.Sqrt"(%3068) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3070 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_norm_added_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %3071 = torch.operator "onnx.Div"(%3070, %3069) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3072 = torch.operator "onnx.Cast"(%3047) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %3073 = torch.operator "onnx.Mul"(%3072, %3071) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %3074 = torch.operator "onnx.Cast"(%3073) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %3075 = torch.operator "onnx.Mul"(%3074, %109) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %3076 = torch.operator "onnx.Concat"(%3062, %3024) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %3077 = torch.operator "onnx.Concat"(%3075, %3037) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %3078 = torch.operator "onnx.Concat"(%3049, %3011) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %3079 = torch.operator "onnx.Shape"(%3076) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %3080 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_15_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %3081 = torch.operator "onnx.Gather"(%3079, %3080) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %3082 = torch.operator "onnx.Shape"(%3076) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %3083 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_16_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %3084 = torch.operator "onnx.Gather"(%3082, %3083) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %3085 = torch.operator "onnx.Shape"(%3076) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %3086 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_17_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %3087 = torch.operator "onnx.Gather"(%3085, %3086) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %3088 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7630_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3089 = torch.operator "onnx.Unsqueeze"(%3081, %3088) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3090 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7632_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3091 = torch.operator "onnx.Unsqueeze"(%3084, %3090) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3092 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7634_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3093 = torch.operator "onnx.Unsqueeze"(%3087, %3092) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3094 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3095 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3096 = torch.operator "onnx.Concat"(%3089, %3091, %3093, %3094, %3095) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %3097 = torch.operator "onnx.Reshape"(%3076, %3096) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %3098 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_20_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %3099:2 = torch.operator "onnx.Split"(%3097, %3098) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %3100 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3101 = torch.operator "onnx.Squeeze"(%3099#0, %3100) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %3102 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3103 = torch.operator "onnx.Squeeze"(%3099#1, %3102) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %3104 = torch.operator "onnx.Neg"(%3103) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %3105 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_23_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3106 = torch.operator "onnx.Unsqueeze"(%3104, %3105) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %3107 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_24_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3108 = torch.operator "onnx.Unsqueeze"(%3101, %3107) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %3109 = torch.operator "onnx.Concat"(%3106, %3108) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %3110 = torch.operator "onnx.Shape"(%3109) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %3111 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_25_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3112 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3113 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3114 = torch.operator "onnx.Slice"(%3110, %3112, %3113, %3111) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %3115 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_28_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3116 = torch.operator "onnx.Concat"(%3114, %3115) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %3117 = torch.operator "onnx.Reshape"(%3109, %3116) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %3118 = torch.operator "onnx.Cast"(%3076) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %3119 = torch.operator "onnx.Mul"(%3118, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %3120 = torch.operator "onnx.Cast"(%3117) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %3121 = torch.operator "onnx.Mul"(%3120, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %3122 = torch.operator "onnx.Add"(%3119, %3121) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %3123 = torch.operator "onnx.Cast"(%3122) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %3124 = torch.operator "onnx.Shape"(%3077) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %3125 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_29_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %3126 = torch.operator "onnx.Gather"(%3124, %3125) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %3127 = torch.operator "onnx.Shape"(%3077) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %3128 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_30_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %3129 = torch.operator "onnx.Gather"(%3127, %3128) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %3130 = torch.operator "onnx.Shape"(%3077) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %3131 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_31_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %3132 = torch.operator "onnx.Gather"(%3130, %3131) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %3133 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7675_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3134 = torch.operator "onnx.Unsqueeze"(%3126, %3133) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3135 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7677_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3136 = torch.operator "onnx.Unsqueeze"(%3129, %3135) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3137 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7679_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3138 = torch.operator "onnx.Unsqueeze"(%3132, %3137) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3139 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3140 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3141 = torch.operator "onnx.Concat"(%3134, %3136, %3138, %3139, %3140) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %3142 = torch.operator "onnx.Reshape"(%3077, %3141) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %3143 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_34_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %3144:2 = torch.operator "onnx.Split"(%3142, %3143) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %3145 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3146 = torch.operator "onnx.Squeeze"(%3144#0, %3145) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %3147 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3148 = torch.operator "onnx.Squeeze"(%3144#1, %3147) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %3149 = torch.operator "onnx.Neg"(%3148) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %3150 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3151 = torch.operator "onnx.Unsqueeze"(%3149, %3150) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %3152 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3153 = torch.operator "onnx.Unsqueeze"(%3146, %3152) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %3154 = torch.operator "onnx.Concat"(%3151, %3153) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %3155 = torch.operator "onnx.Shape"(%3154) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %3156 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_39_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3157 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_40_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3158 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3159 = torch.operator "onnx.Slice"(%3155, %3157, %3158, %3156) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %3160 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_42_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3161 = torch.operator "onnx.Concat"(%3159, %3160) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %3162 = torch.operator "onnx.Reshape"(%3154, %3161) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %3163 = torch.operator "onnx.Cast"(%3077) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %3164 = torch.operator "onnx.Mul"(%3163, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %3165 = torch.operator "onnx.Cast"(%3162) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %3166 = torch.operator "onnx.Mul"(%3165, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %3167 = torch.operator "onnx.Add"(%3164, %3166) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %3168 = torch.operator "onnx.Cast"(%3167) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %3169 = torch.operator "onnx.Shape"(%3123) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %3170 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_43_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3171 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_44_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3172 = torch.operator "onnx.Slice"(%3169, %3170, %3171) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3173 = torch.operator "onnx.Cast"(%3172) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %3174 = torch.operator "onnx.Sqrt"(%3173) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %3175 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_45_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %3176 = torch.operator "onnx.Cast"(%3174) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %3177 = torch.operator "onnx.Div"(%3175, %3176) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %3178 = torch.operator "onnx.Cast"(%3177) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %3179 = torch.operator "onnx.Transpose"(%3168) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %3180 = torch.operator "onnx.Sqrt"(%3178) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %3181 = torch.operator "onnx.Mul"(%3123, %3180) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %3182 = torch.operator "onnx.Sqrt"(%3178) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %3183 = torch.operator "onnx.Mul"(%3179, %3182) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %3184 = torch.operator "onnx.MatMul"(%3181, %3183) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %3185 = torch.operator "onnx.Softmax"(%3184) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %3186 = torch.operator "onnx.MatMul"(%3185, %3078) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %3187 = torch.operator "onnx.Transpose"(%3186) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %3188 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_46_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %3189 = torch.operator "onnx.Mul"(%2963, %3188) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %3190 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7732_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3191 = torch.operator "onnx.Unsqueeze"(%2950, %3190) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3192 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_47_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3193 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7735_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3194 = torch.operator "onnx.Unsqueeze"(%3189, %3193) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3195 = torch.operator "onnx.Concat"(%3191, %3192, %3194) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %3196 = torch.operator "onnx.Reshape"(%3187, %3195) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %3197 = torch.operator "onnx.Cast"(%3196) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %3198 = torch.operator "onnx.Shape"(%2947) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %3199 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_48_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %3200 = torch.operator "onnx.Gather"(%3198, %3199) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %3201 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_49_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3202 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_50_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3203 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_51_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3204 = torch.operator "onnx.Unsqueeze"(%3200, %3203) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3205 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_52_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3206 = torch.operator "onnx.Slice"(%3197, %3202, %3204, %3201, %3205) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %3207 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_53_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3208 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_54_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3209 = torch.operator "onnx.Unsqueeze"(%3200, %3208) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3210 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_55_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3211 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_attn_Constant_56_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3212 = torch.operator "onnx.Slice"(%3197, %3209, %3210, %3207, %3211) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %3213 = torch.operator "onnx.MatMul"(%3212, %795) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %3214 = torch.operator "onnx.Add"(%106, %3213) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %3215 = torch.operator "onnx.MatMul"(%3206, %796) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %3216 = torch.operator "onnx.Add"(%107, %3215) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %3217 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3218 = torch.operator "onnx.Unsqueeze"(%2889, %3217) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %3219 = torch.operator "onnx.Mul"(%3218, %3214) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %3220 = torch.operator "onnx.Add"(%2835, %3219) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %_2Ftransformer_blocks.42Fnorm22FConstant_attr__value = util.global.load @"/transformer_blocks.4/norm2/Constant_attr__value" : tensor<3072xbf16> %3221 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.42Fnorm22FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.42Fnorm22FConstant_1_attr__value = util.global.load @"/transformer_blocks.4/norm2/Constant_1_attr__value" : tensor<3072xbf16> %3222 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.42Fnorm22FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %3223 = torch.operator "onnx.LayerNormalization"(%3220, %3221, %3222) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %3224 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3225 = torch.operator "onnx.Unsqueeze"(%2895, %3224) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %3226 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %3227 = torch.operator "onnx.Add"(%3225, %3226) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %3228 = torch.operator "onnx.Mul"(%3223, %3227) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %3229 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3230 = torch.operator "onnx.Unsqueeze"(%2892, %3229) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %3231 = torch.operator "onnx.Add"(%3228, %3230) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %3232 = torch.operator "onnx.MatMul"(%3231, %797) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %3233 = torch.operator "onnx.Add"(%110, %3232) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %3234 = torch.operator "onnx.Mul"(%3233, %3233) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %3235 = torch.operator "onnx.Mul"(%3233, %3234) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %3236 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_ff_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %3237 = torch.operator "onnx.Mul"(%3236, %3235) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %3238 = torch.operator "onnx.Add"(%3233, %3237) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %3239 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_ff_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %3240 = torch.operator "onnx.Mul"(%3239, %3238) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %3241 = torch.operator "onnx.Tanh"(%3240) : (!torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %3242 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_ff_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %3243 = torch.operator "onnx.Add"(%3242, %3241) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %3244 = torch.operator "onnx.Mul"(%3233, %3243) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %3245 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_ff_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %3246 = torch.operator "onnx.Mul"(%3245, %3244) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %3247 = torch.operator "onnx.MatMul"(%3246, %798) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %3248 = torch.operator "onnx.Add"(%111, %3247) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %3249 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3250 = torch.operator "onnx.Unsqueeze"(%2898, %3249) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %3251 = torch.operator "onnx.Mul"(%3250, %3248) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %3252 = torch.operator "onnx.Add"(%3220, %3251) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %3253 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3254 = torch.operator "onnx.Unsqueeze"(%2927, %3253) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %3255 = torch.operator "onnx.Mul"(%3254, %3216) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %3256 = torch.operator "onnx.Add"(%2871, %3255) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %_2Ftransformer_blocks.42Fnorm2_context2FConstant_attr__value = util.global.load @"/transformer_blocks.4/norm2_context/Constant_attr__value" : tensor<3072xbf16> %3257 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.42Fnorm2_context2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.42Fnorm2_context2FConstant_1_attr__value = util.global.load @"/transformer_blocks.4/norm2_context/Constant_1_attr__value" : tensor<3072xbf16> %3258 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.42Fnorm2_context2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %3259 = torch.operator "onnx.LayerNormalization"(%3256, %3257, %3258) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3260 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3261 = torch.operator "onnx.Unsqueeze"(%2933, %3260) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %3262 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_Constant_7_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %3263 = torch.operator "onnx.Add"(%3261, %3262) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %3264 = torch.operator "onnx.Mul"(%3259, %3263) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3265 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3266 = torch.operator "onnx.Unsqueeze"(%2930, %3265) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %3267 = torch.operator "onnx.Add"(%3264, %3266) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3268 = torch.operator "onnx.MatMul"(%3267, %799) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %3269 = torch.operator "onnx.Add"(%112, %3268) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %3270 = torch.operator "onnx.Mul"(%3269, %3269) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %3271 = torch.operator "onnx.Mul"(%3269, %3270) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %3272 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_ff_context_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %3273 = torch.operator "onnx.Mul"(%3272, %3271) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %3274 = torch.operator "onnx.Add"(%3269, %3273) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %3275 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_ff_context_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %3276 = torch.operator "onnx.Mul"(%3275, %3274) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %3277 = torch.operator "onnx.Tanh"(%3276) : (!torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %3278 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_ff_context_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %3279 = torch.operator "onnx.Add"(%3278, %3277) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %3280 = torch.operator "onnx.Mul"(%3269, %3279) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %3281 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_ff_context_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %3282 = torch.operator "onnx.Mul"(%3281, %3280) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %3283 = torch.operator "onnx.MatMul"(%3282, %800) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3284 = torch.operator "onnx.Add"(%113, %3283) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3285 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.4_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3286 = torch.operator "onnx.Unsqueeze"(%2936, %3285) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %3287 = torch.operator "onnx.Mul"(%3286, %3284) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3288 = torch.operator "onnx.Add"(%3256, %3287) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3289 = torch.operator "onnx.Gemm"(%1285, %114, %115) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %3290 = torch.operator "onnx.Shape"(%3289) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %3291 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_norm1_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3292 = torch.operator "onnx.Gather"(%3290, %3291) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3293 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_norm1_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3294 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_norm1_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3295 = torch.operator "onnx.Add"(%3292, %3294) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3296 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_norm1_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3297 = torch.operator "onnx.Div"(%3295, %3296) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3298 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_norm1_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3299 = torch.operator "onnx.Mul"(%3297, %3298) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3300 = torch.operator "onnx.Slice"(%3289, %3293, %3299, %3291) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %3301 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_norm1_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3302 = torch.operator "onnx.Mul"(%3297, %3301) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3303 = torch.operator "onnx.Slice"(%3289, %3299, %3302, %3291) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %3304 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_norm1_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3305 = torch.operator "onnx.Mul"(%3297, %3304) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3306 = torch.operator "onnx.Slice"(%3289, %3302, %3305, %3291) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %3307 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_norm1_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3308 = torch.operator "onnx.Mul"(%3297, %3307) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3309 = torch.operator "onnx.Slice"(%3289, %3305, %3308, %3291) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %3310 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_norm1_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3311 = torch.operator "onnx.Mul"(%3297, %3310) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3312 = torch.operator "onnx.Slice"(%3289, %3308, %3311, %3291) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %3313 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_norm1_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3314 = torch.operator "onnx.Mul"(%3297, %3313) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3315 = torch.operator "onnx.Slice"(%3289, %3311, %3314, %3291) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.52Fnorm12Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.5/norm1/norm/Constant_attr__value" : tensor<3072xbf16> %3316 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.52Fnorm12Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.52Fnorm12Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.5/norm1/norm/Constant_1_attr__value" : tensor<3072xbf16> %3317 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.52Fnorm12Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %3318 = torch.operator "onnx.LayerNormalization"(%3252, %3316, %3317) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %3319 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_norm1_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3320 = torch.operator "onnx.Unsqueeze"(%3303, %3319) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %3321 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_norm1_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %3322 = torch.operator "onnx.Add"(%3320, %3321) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %3323 = torch.operator "onnx.Mul"(%3318, %3322) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %3324 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_norm1_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3325 = torch.operator "onnx.Unsqueeze"(%3300, %3324) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %3326 = torch.operator "onnx.Add"(%3323, %3325) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %3327 = torch.operator "onnx.Gemm"(%1285, %116, %117) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %3328 = torch.operator "onnx.Shape"(%3327) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %3329 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_norm1_context_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3330 = torch.operator "onnx.Gather"(%3328, %3329) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3331 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_norm1_context_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3332 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_norm1_context_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3333 = torch.operator "onnx.Add"(%3330, %3332) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3334 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_norm1_context_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3335 = torch.operator "onnx.Div"(%3333, %3334) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3336 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_norm1_context_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3337 = torch.operator "onnx.Mul"(%3335, %3336) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3338 = torch.operator "onnx.Slice"(%3327, %3331, %3337, %3329) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %3339 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_norm1_context_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3340 = torch.operator "onnx.Mul"(%3335, %3339) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3341 = torch.operator "onnx.Slice"(%3327, %3337, %3340, %3329) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %3342 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_norm1_context_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3343 = torch.operator "onnx.Mul"(%3335, %3342) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3344 = torch.operator "onnx.Slice"(%3327, %3340, %3343, %3329) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %3345 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_norm1_context_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3346 = torch.operator "onnx.Mul"(%3335, %3345) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3347 = torch.operator "onnx.Slice"(%3327, %3343, %3346, %3329) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %3348 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_norm1_context_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3349 = torch.operator "onnx.Mul"(%3335, %3348) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3350 = torch.operator "onnx.Slice"(%3327, %3346, %3349, %3329) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %3351 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_norm1_context_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3352 = torch.operator "onnx.Mul"(%3335, %3351) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3353 = torch.operator "onnx.Slice"(%3327, %3349, %3352, %3329) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.52Fnorm1_context2Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.5/norm1_context/norm/Constant_attr__value" : tensor<3072xbf16> %3354 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.52Fnorm1_context2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.52Fnorm1_context2Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.5/norm1_context/norm/Constant_1_attr__value" : tensor<3072xbf16> %3355 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.52Fnorm1_context2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %3356 = torch.operator "onnx.LayerNormalization"(%3288, %3354, %3355) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3357 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_norm1_context_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3358 = torch.operator "onnx.Unsqueeze"(%3341, %3357) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %3359 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_norm1_context_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %3360 = torch.operator "onnx.Add"(%3358, %3359) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %3361 = torch.operator "onnx.Mul"(%3356, %3360) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3362 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_norm1_context_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3363 = torch.operator "onnx.Unsqueeze"(%3338, %3362) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %3364 = torch.operator "onnx.Add"(%3361, %3363) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3365 = torch.operator "onnx.Shape"(%3364) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %3366 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %3367 = torch.operator "onnx.Gather"(%3365, %3366) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %3368 = torch.operator "onnx.MatMul"(%3326, %801) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %3369 = torch.operator "onnx.Add"(%120, %3368) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %3370 = torch.operator "onnx.MatMul"(%3326, %802) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %3371 = torch.operator "onnx.Add"(%121, %3370) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %3372 = torch.operator "onnx.MatMul"(%3326, %803) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %3373 = torch.operator "onnx.Add"(%122, %3372) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %3374 = torch.operator "onnx.Shape"(%3371) : (!torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[3],si64> %3375 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %3376 = torch.operator "onnx.Gather"(%3374, %3375) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %3377 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %3378 = torch.operator "onnx.Div"(%3376, %3377) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %3379 = torch.operator "onnx.Cast"(%3378) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %3380 = torch.operator "onnx.Cast"(%3379) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %3381 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7923_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3382 = torch.operator "onnx.Unsqueeze"(%3367, %3381) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3383 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3384 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3385 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7927_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3386 = torch.operator "onnx.Unsqueeze"(%3380, %3385) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3387 = torch.operator "onnx.Concat"(%3382, %3383, %3384, %3386) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %3388 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7930_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3389 = torch.operator "onnx.Unsqueeze"(%3367, %3388) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3390 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3391 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3392 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7934_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3393 = torch.operator "onnx.Unsqueeze"(%3380, %3392) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3394 = torch.operator "onnx.Concat"(%3389, %3390, %3391, %3393) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %3395 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7937_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3396 = torch.operator "onnx.Unsqueeze"(%3367, %3395) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3397 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3398 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3399 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7941_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3400 = torch.operator "onnx.Unsqueeze"(%3380, %3399) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3401 = torch.operator "onnx.Concat"(%3396, %3397, %3398, %3400) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %3402 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7944_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3403 = torch.operator "onnx.Unsqueeze"(%3367, %3402) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3404 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3405 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3406 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7948_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3407 = torch.operator "onnx.Unsqueeze"(%3380, %3406) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3408 = torch.operator "onnx.Concat"(%3403, %3404, %3405, %3407) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %3409 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7951_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3410 = torch.operator "onnx.Unsqueeze"(%3367, %3409) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3411 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_11_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3412 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3413 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7955_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3414 = torch.operator "onnx.Unsqueeze"(%3380, %3413) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3415 = torch.operator "onnx.Concat"(%3410, %3411, %3412, %3414) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %3416 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7958_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3417 = torch.operator "onnx.Unsqueeze"(%3367, %3416) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3418 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3419 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_14_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3420 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_7962_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3421 = torch.operator "onnx.Unsqueeze"(%3380, %3420) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3422 = torch.operator "onnx.Concat"(%3417, %3418, %3419, %3421) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %3423 = torch.operator "onnx.Reshape"(%3369, %3387) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %3424 = torch.operator "onnx.Transpose"(%3423) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %3425 = torch.operator "onnx.Reshape"(%3371, %3394) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %3426 = torch.operator "onnx.Transpose"(%3425) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %3427 = torch.operator "onnx.Reshape"(%3373, %3401) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %3428 = torch.operator "onnx.Transpose"(%3427) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %3429 = torch.operator "onnx.Cast"(%3424) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %3430 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %3431 = torch.operator "onnx.Pow"(%3429, %3430) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %3432 = torch.operator "onnx.ReduceMean"(%3431) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3433 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %3434 = torch.operator "onnx.Add"(%3432, %3433) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3435 = torch.operator "onnx.Sqrt"(%3434) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3436 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %3437 = torch.operator "onnx.Div"(%3436, %3435) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3438 = torch.operator "onnx.Cast"(%3424) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %3439 = torch.operator "onnx.Mul"(%3438, %3437) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %3440 = torch.operator "onnx.Cast"(%3439) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %3441 = torch.operator "onnx.Mul"(%3440, %118) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %3442 = torch.operator "onnx.Cast"(%3426) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %3443 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %3444 = torch.operator "onnx.Pow"(%3442, %3443) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %3445 = torch.operator "onnx.ReduceMean"(%3444) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3446 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %3447 = torch.operator "onnx.Add"(%3445, %3446) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3448 = torch.operator "onnx.Sqrt"(%3447) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3449 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %3450 = torch.operator "onnx.Div"(%3449, %3448) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3451 = torch.operator "onnx.Cast"(%3426) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %3452 = torch.operator "onnx.Mul"(%3451, %3450) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %3453 = torch.operator "onnx.Cast"(%3452) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %3454 = torch.operator "onnx.Mul"(%3453, %119) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %3455 = torch.operator "onnx.MatMul"(%3364, %804) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3456 = torch.operator "onnx.Add"(%125, %3455) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3457 = torch.operator "onnx.MatMul"(%3364, %805) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3458 = torch.operator "onnx.Add"(%123, %3457) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3459 = torch.operator "onnx.MatMul"(%3364, %806) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3460 = torch.operator "onnx.Add"(%124, %3459) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3461 = torch.operator "onnx.Reshape"(%3456, %3408) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %3462 = torch.operator "onnx.Transpose"(%3461) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %3463 = torch.operator "onnx.Reshape"(%3458, %3415) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %3464 = torch.operator "onnx.Transpose"(%3463) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %3465 = torch.operator "onnx.Reshape"(%3460, %3422) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %3466 = torch.operator "onnx.Transpose"(%3465) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %3467 = torch.operator "onnx.Cast"(%3462) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %3468 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_norm_added_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %3469 = torch.operator "onnx.Pow"(%3467, %3468) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %3470 = torch.operator "onnx.ReduceMean"(%3469) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3471 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_norm_added_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %3472 = torch.operator "onnx.Add"(%3470, %3471) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3473 = torch.operator "onnx.Sqrt"(%3472) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3474 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_norm_added_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %3475 = torch.operator "onnx.Div"(%3474, %3473) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3476 = torch.operator "onnx.Cast"(%3462) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %3477 = torch.operator "onnx.Mul"(%3476, %3475) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %3478 = torch.operator "onnx.Cast"(%3477) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %3479 = torch.operator "onnx.Mul"(%3478, %128) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %3480 = torch.operator "onnx.Cast"(%3464) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %3481 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_norm_added_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %3482 = torch.operator "onnx.Pow"(%3480, %3481) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %3483 = torch.operator "onnx.ReduceMean"(%3482) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3484 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_norm_added_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %3485 = torch.operator "onnx.Add"(%3483, %3484) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3486 = torch.operator "onnx.Sqrt"(%3485) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3487 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_norm_added_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %3488 = torch.operator "onnx.Div"(%3487, %3486) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3489 = torch.operator "onnx.Cast"(%3464) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %3490 = torch.operator "onnx.Mul"(%3489, %3488) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %3491 = torch.operator "onnx.Cast"(%3490) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %3492 = torch.operator "onnx.Mul"(%3491, %129) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %3493 = torch.operator "onnx.Concat"(%3479, %3441) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %3494 = torch.operator "onnx.Concat"(%3492, %3454) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %3495 = torch.operator "onnx.Concat"(%3466, %3428) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %3496 = torch.operator "onnx.Shape"(%3493) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %3497 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_15_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %3498 = torch.operator "onnx.Gather"(%3496, %3497) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %3499 = torch.operator "onnx.Shape"(%3493) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %3500 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_16_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %3501 = torch.operator "onnx.Gather"(%3499, %3500) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %3502 = torch.operator "onnx.Shape"(%3493) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %3503 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_17_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %3504 = torch.operator "onnx.Gather"(%3502, %3503) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %3505 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8047_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3506 = torch.operator "onnx.Unsqueeze"(%3498, %3505) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3507 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8049_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3508 = torch.operator "onnx.Unsqueeze"(%3501, %3507) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3509 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8051_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3510 = torch.operator "onnx.Unsqueeze"(%3504, %3509) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3511 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3512 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3513 = torch.operator "onnx.Concat"(%3506, %3508, %3510, %3511, %3512) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %3514 = torch.operator "onnx.Reshape"(%3493, %3513) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %3515 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_20_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %3516:2 = torch.operator "onnx.Split"(%3514, %3515) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %3517 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3518 = torch.operator "onnx.Squeeze"(%3516#0, %3517) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %3519 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3520 = torch.operator "onnx.Squeeze"(%3516#1, %3519) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %3521 = torch.operator "onnx.Neg"(%3520) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %3522 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_23_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3523 = torch.operator "onnx.Unsqueeze"(%3521, %3522) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %3524 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_24_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3525 = torch.operator "onnx.Unsqueeze"(%3518, %3524) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %3526 = torch.operator "onnx.Concat"(%3523, %3525) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %3527 = torch.operator "onnx.Shape"(%3526) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %3528 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_25_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3529 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3530 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3531 = torch.operator "onnx.Slice"(%3527, %3529, %3530, %3528) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %3532 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_28_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3533 = torch.operator "onnx.Concat"(%3531, %3532) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %3534 = torch.operator "onnx.Reshape"(%3526, %3533) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %3535 = torch.operator "onnx.Cast"(%3493) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %3536 = torch.operator "onnx.Mul"(%3535, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %3537 = torch.operator "onnx.Cast"(%3534) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %3538 = torch.operator "onnx.Mul"(%3537, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %3539 = torch.operator "onnx.Add"(%3536, %3538) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %3540 = torch.operator "onnx.Cast"(%3539) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %3541 = torch.operator "onnx.Shape"(%3494) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %3542 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_29_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %3543 = torch.operator "onnx.Gather"(%3541, %3542) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %3544 = torch.operator "onnx.Shape"(%3494) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %3545 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_30_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %3546 = torch.operator "onnx.Gather"(%3544, %3545) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %3547 = torch.operator "onnx.Shape"(%3494) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %3548 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_31_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %3549 = torch.operator "onnx.Gather"(%3547, %3548) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %3550 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8092_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3551 = torch.operator "onnx.Unsqueeze"(%3543, %3550) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3552 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8094_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3553 = torch.operator "onnx.Unsqueeze"(%3546, %3552) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3554 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8096_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3555 = torch.operator "onnx.Unsqueeze"(%3549, %3554) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3556 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3557 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3558 = torch.operator "onnx.Concat"(%3551, %3553, %3555, %3556, %3557) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %3559 = torch.operator "onnx.Reshape"(%3494, %3558) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %3560 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_34_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %3561:2 = torch.operator "onnx.Split"(%3559, %3560) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %3562 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3563 = torch.operator "onnx.Squeeze"(%3561#0, %3562) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %3564 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3565 = torch.operator "onnx.Squeeze"(%3561#1, %3564) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %3566 = torch.operator "onnx.Neg"(%3565) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %3567 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3568 = torch.operator "onnx.Unsqueeze"(%3566, %3567) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %3569 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3570 = torch.operator "onnx.Unsqueeze"(%3563, %3569) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %3571 = torch.operator "onnx.Concat"(%3568, %3570) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %3572 = torch.operator "onnx.Shape"(%3571) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %3573 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_39_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3574 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_40_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3575 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3576 = torch.operator "onnx.Slice"(%3572, %3574, %3575, %3573) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %3577 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_42_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3578 = torch.operator "onnx.Concat"(%3576, %3577) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %3579 = torch.operator "onnx.Reshape"(%3571, %3578) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %3580 = torch.operator "onnx.Cast"(%3494) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %3581 = torch.operator "onnx.Mul"(%3580, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %3582 = torch.operator "onnx.Cast"(%3579) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %3583 = torch.operator "onnx.Mul"(%3582, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %3584 = torch.operator "onnx.Add"(%3581, %3583) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %3585 = torch.operator "onnx.Cast"(%3584) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %3586 = torch.operator "onnx.Shape"(%3540) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %3587 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_43_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3588 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_44_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3589 = torch.operator "onnx.Slice"(%3586, %3587, %3588) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3590 = torch.operator "onnx.Cast"(%3589) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %3591 = torch.operator "onnx.Sqrt"(%3590) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %3592 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_45_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %3593 = torch.operator "onnx.Cast"(%3591) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %3594 = torch.operator "onnx.Div"(%3592, %3593) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %3595 = torch.operator "onnx.Cast"(%3594) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %3596 = torch.operator "onnx.Transpose"(%3585) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %3597 = torch.operator "onnx.Sqrt"(%3595) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %3598 = torch.operator "onnx.Mul"(%3540, %3597) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %3599 = torch.operator "onnx.Sqrt"(%3595) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %3600 = torch.operator "onnx.Mul"(%3596, %3599) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %3601 = torch.operator "onnx.MatMul"(%3598, %3600) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %3602 = torch.operator "onnx.Softmax"(%3601) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %3603 = torch.operator "onnx.MatMul"(%3602, %3495) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %3604 = torch.operator "onnx.Transpose"(%3603) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %3605 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_46_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %3606 = torch.operator "onnx.Mul"(%3380, %3605) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %3607 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8149_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3608 = torch.operator "onnx.Unsqueeze"(%3367, %3607) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3609 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_47_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3610 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8152_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3611 = torch.operator "onnx.Unsqueeze"(%3606, %3610) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3612 = torch.operator "onnx.Concat"(%3608, %3609, %3611) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %3613 = torch.operator "onnx.Reshape"(%3604, %3612) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %3614 = torch.operator "onnx.Cast"(%3613) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %3615 = torch.operator "onnx.Shape"(%3364) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %3616 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_48_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %3617 = torch.operator "onnx.Gather"(%3615, %3616) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %3618 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_49_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3619 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_50_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3620 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_51_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3621 = torch.operator "onnx.Unsqueeze"(%3617, %3620) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3622 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_52_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3623 = torch.operator "onnx.Slice"(%3614, %3619, %3621, %3618, %3622) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %3624 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_53_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3625 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_54_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3626 = torch.operator "onnx.Unsqueeze"(%3617, %3625) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3627 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_55_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3628 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_attn_Constant_56_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3629 = torch.operator "onnx.Slice"(%3614, %3626, %3627, %3624, %3628) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %3630 = torch.operator "onnx.MatMul"(%3629, %807) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %3631 = torch.operator "onnx.Add"(%126, %3630) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %3632 = torch.operator "onnx.MatMul"(%3623, %808) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %3633 = torch.operator "onnx.Add"(%127, %3632) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %3634 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3635 = torch.operator "onnx.Unsqueeze"(%3306, %3634) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %3636 = torch.operator "onnx.Mul"(%3635, %3631) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %3637 = torch.operator "onnx.Add"(%3252, %3636) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %_2Ftransformer_blocks.52Fnorm22FConstant_attr__value = util.global.load @"/transformer_blocks.5/norm2/Constant_attr__value" : tensor<3072xbf16> %3638 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.52Fnorm22FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.52Fnorm22FConstant_1_attr__value = util.global.load @"/transformer_blocks.5/norm2/Constant_1_attr__value" : tensor<3072xbf16> %3639 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.52Fnorm22FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %3640 = torch.operator "onnx.LayerNormalization"(%3637, %3638, %3639) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %3641 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3642 = torch.operator "onnx.Unsqueeze"(%3312, %3641) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %3643 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %3644 = torch.operator "onnx.Add"(%3642, %3643) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %3645 = torch.operator "onnx.Mul"(%3640, %3644) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %3646 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3647 = torch.operator "onnx.Unsqueeze"(%3309, %3646) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %3648 = torch.operator "onnx.Add"(%3645, %3647) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %3649 = torch.operator "onnx.MatMul"(%3648, %809) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %3650 = torch.operator "onnx.Add"(%130, %3649) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %3651 = torch.operator "onnx.Mul"(%3650, %3650) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %3652 = torch.operator "onnx.Mul"(%3650, %3651) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %3653 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_ff_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %3654 = torch.operator "onnx.Mul"(%3653, %3652) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %3655 = torch.operator "onnx.Add"(%3650, %3654) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %3656 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_ff_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %3657 = torch.operator "onnx.Mul"(%3656, %3655) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %3658 = torch.operator "onnx.Tanh"(%3657) : (!torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %3659 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_ff_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %3660 = torch.operator "onnx.Add"(%3659, %3658) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %3661 = torch.operator "onnx.Mul"(%3650, %3660) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %3662 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_ff_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %3663 = torch.operator "onnx.Mul"(%3662, %3661) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %3664 = torch.operator "onnx.MatMul"(%3663, %810) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %3665 = torch.operator "onnx.Add"(%131, %3664) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %3666 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3667 = torch.operator "onnx.Unsqueeze"(%3315, %3666) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %3668 = torch.operator "onnx.Mul"(%3667, %3665) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %3669 = torch.operator "onnx.Add"(%3637, %3668) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %3670 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3671 = torch.operator "onnx.Unsqueeze"(%3344, %3670) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %3672 = torch.operator "onnx.Mul"(%3671, %3633) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %3673 = torch.operator "onnx.Add"(%3288, %3672) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %_2Ftransformer_blocks.52Fnorm2_context2FConstant_attr__value = util.global.load @"/transformer_blocks.5/norm2_context/Constant_attr__value" : tensor<3072xbf16> %3674 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.52Fnorm2_context2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.52Fnorm2_context2FConstant_1_attr__value = util.global.load @"/transformer_blocks.5/norm2_context/Constant_1_attr__value" : tensor<3072xbf16> %3675 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.52Fnorm2_context2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %3676 = torch.operator "onnx.LayerNormalization"(%3673, %3674, %3675) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3677 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3678 = torch.operator "onnx.Unsqueeze"(%3350, %3677) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %3679 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_Constant_7_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %3680 = torch.operator "onnx.Add"(%3678, %3679) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %3681 = torch.operator "onnx.Mul"(%3676, %3680) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3682 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3683 = torch.operator "onnx.Unsqueeze"(%3347, %3682) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %3684 = torch.operator "onnx.Add"(%3681, %3683) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3685 = torch.operator "onnx.MatMul"(%3684, %811) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %3686 = torch.operator "onnx.Add"(%132, %3685) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %3687 = torch.operator "onnx.Mul"(%3686, %3686) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %3688 = torch.operator "onnx.Mul"(%3686, %3687) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %3689 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_ff_context_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %3690 = torch.operator "onnx.Mul"(%3689, %3688) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %3691 = torch.operator "onnx.Add"(%3686, %3690) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %3692 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_ff_context_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %3693 = torch.operator "onnx.Mul"(%3692, %3691) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %3694 = torch.operator "onnx.Tanh"(%3693) : (!torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %3695 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_ff_context_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %3696 = torch.operator "onnx.Add"(%3695, %3694) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %3697 = torch.operator "onnx.Mul"(%3686, %3696) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %3698 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_ff_context_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %3699 = torch.operator "onnx.Mul"(%3698, %3697) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %3700 = torch.operator "onnx.MatMul"(%3699, %812) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3701 = torch.operator "onnx.Add"(%133, %3700) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3702 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.5_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3703 = torch.operator "onnx.Unsqueeze"(%3353, %3702) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %3704 = torch.operator "onnx.Mul"(%3703, %3701) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3705 = torch.operator "onnx.Add"(%3673, %3704) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3706 = torch.operator "onnx.Gemm"(%1285, %134, %135) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %3707 = torch.operator "onnx.Shape"(%3706) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %3708 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_norm1_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3709 = torch.operator "onnx.Gather"(%3707, %3708) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3710 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_norm1_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3711 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_norm1_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3712 = torch.operator "onnx.Add"(%3709, %3711) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3713 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_norm1_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3714 = torch.operator "onnx.Div"(%3712, %3713) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3715 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_norm1_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3716 = torch.operator "onnx.Mul"(%3714, %3715) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3717 = torch.operator "onnx.Slice"(%3706, %3710, %3716, %3708) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %3718 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_norm1_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3719 = torch.operator "onnx.Mul"(%3714, %3718) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3720 = torch.operator "onnx.Slice"(%3706, %3716, %3719, %3708) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %3721 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_norm1_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3722 = torch.operator "onnx.Mul"(%3714, %3721) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3723 = torch.operator "onnx.Slice"(%3706, %3719, %3722, %3708) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %3724 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_norm1_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3725 = torch.operator "onnx.Mul"(%3714, %3724) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3726 = torch.operator "onnx.Slice"(%3706, %3722, %3725, %3708) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %3727 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_norm1_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3728 = torch.operator "onnx.Mul"(%3714, %3727) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3729 = torch.operator "onnx.Slice"(%3706, %3725, %3728, %3708) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %3730 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_norm1_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3731 = torch.operator "onnx.Mul"(%3714, %3730) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3732 = torch.operator "onnx.Slice"(%3706, %3728, %3731, %3708) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.62Fnorm12Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.6/norm1/norm/Constant_attr__value" : tensor<3072xbf16> %3733 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.62Fnorm12Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.62Fnorm12Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.6/norm1/norm/Constant_1_attr__value" : tensor<3072xbf16> %3734 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.62Fnorm12Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %3735 = torch.operator "onnx.LayerNormalization"(%3669, %3733, %3734) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %3736 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_norm1_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3737 = torch.operator "onnx.Unsqueeze"(%3720, %3736) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %3738 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_norm1_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %3739 = torch.operator "onnx.Add"(%3737, %3738) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %3740 = torch.operator "onnx.Mul"(%3735, %3739) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %3741 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_norm1_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3742 = torch.operator "onnx.Unsqueeze"(%3717, %3741) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %3743 = torch.operator "onnx.Add"(%3740, %3742) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %3744 = torch.operator "onnx.Gemm"(%1285, %136, %137) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %3745 = torch.operator "onnx.Shape"(%3744) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %3746 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_norm1_context_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3747 = torch.operator "onnx.Gather"(%3745, %3746) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3748 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_norm1_context_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3749 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_norm1_context_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3750 = torch.operator "onnx.Add"(%3747, %3749) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3751 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_norm1_context_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3752 = torch.operator "onnx.Div"(%3750, %3751) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3753 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_norm1_context_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3754 = torch.operator "onnx.Mul"(%3752, %3753) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3755 = torch.operator "onnx.Slice"(%3744, %3748, %3754, %3746) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %3756 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_norm1_context_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3757 = torch.operator "onnx.Mul"(%3752, %3756) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3758 = torch.operator "onnx.Slice"(%3744, %3754, %3757, %3746) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %3759 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_norm1_context_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3760 = torch.operator "onnx.Mul"(%3752, %3759) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3761 = torch.operator "onnx.Slice"(%3744, %3757, %3760, %3746) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %3762 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_norm1_context_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3763 = torch.operator "onnx.Mul"(%3752, %3762) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3764 = torch.operator "onnx.Slice"(%3744, %3760, %3763, %3746) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %3765 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_norm1_context_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3766 = torch.operator "onnx.Mul"(%3752, %3765) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3767 = torch.operator "onnx.Slice"(%3744, %3763, %3766, %3746) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %3768 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_norm1_context_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3769 = torch.operator "onnx.Mul"(%3752, %3768) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3770 = torch.operator "onnx.Slice"(%3744, %3766, %3769, %3746) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.62Fnorm1_context2Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.6/norm1_context/norm/Constant_attr__value" : tensor<3072xbf16> %3771 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.62Fnorm1_context2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.62Fnorm1_context2Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.6/norm1_context/norm/Constant_1_attr__value" : tensor<3072xbf16> %3772 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.62Fnorm1_context2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %3773 = torch.operator "onnx.LayerNormalization"(%3705, %3771, %3772) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3774 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_norm1_context_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3775 = torch.operator "onnx.Unsqueeze"(%3758, %3774) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %3776 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_norm1_context_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %3777 = torch.operator "onnx.Add"(%3775, %3776) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %3778 = torch.operator "onnx.Mul"(%3773, %3777) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3779 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_norm1_context_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3780 = torch.operator "onnx.Unsqueeze"(%3755, %3779) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %3781 = torch.operator "onnx.Add"(%3778, %3780) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3782 = torch.operator "onnx.Shape"(%3781) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %3783 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %3784 = torch.operator "onnx.Gather"(%3782, %3783) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %3785 = torch.operator "onnx.MatMul"(%3743, %813) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %3786 = torch.operator "onnx.Add"(%140, %3785) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %3787 = torch.operator "onnx.MatMul"(%3743, %814) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %3788 = torch.operator "onnx.Add"(%141, %3787) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %3789 = torch.operator "onnx.MatMul"(%3743, %815) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %3790 = torch.operator "onnx.Add"(%142, %3789) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %3791 = torch.operator "onnx.Shape"(%3788) : (!torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[3],si64> %3792 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %3793 = torch.operator "onnx.Gather"(%3791, %3792) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %3794 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %3795 = torch.operator "onnx.Div"(%3793, %3794) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %3796 = torch.operator "onnx.Cast"(%3795) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %3797 = torch.operator "onnx.Cast"(%3796) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %3798 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8340_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3799 = torch.operator "onnx.Unsqueeze"(%3784, %3798) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3800 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3801 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3802 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8344_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3803 = torch.operator "onnx.Unsqueeze"(%3797, %3802) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3804 = torch.operator "onnx.Concat"(%3799, %3800, %3801, %3803) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %3805 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8347_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3806 = torch.operator "onnx.Unsqueeze"(%3784, %3805) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3807 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3808 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3809 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8351_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3810 = torch.operator "onnx.Unsqueeze"(%3797, %3809) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3811 = torch.operator "onnx.Concat"(%3806, %3807, %3808, %3810) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %3812 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8354_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3813 = torch.operator "onnx.Unsqueeze"(%3784, %3812) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3814 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3815 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3816 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8358_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3817 = torch.operator "onnx.Unsqueeze"(%3797, %3816) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3818 = torch.operator "onnx.Concat"(%3813, %3814, %3815, %3817) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %3819 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8361_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3820 = torch.operator "onnx.Unsqueeze"(%3784, %3819) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3821 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3822 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3823 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8365_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3824 = torch.operator "onnx.Unsqueeze"(%3797, %3823) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3825 = torch.operator "onnx.Concat"(%3820, %3821, %3822, %3824) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %3826 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8368_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3827 = torch.operator "onnx.Unsqueeze"(%3784, %3826) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3828 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_11_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3829 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3830 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8372_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3831 = torch.operator "onnx.Unsqueeze"(%3797, %3830) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3832 = torch.operator "onnx.Concat"(%3827, %3828, %3829, %3831) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %3833 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8375_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3834 = torch.operator "onnx.Unsqueeze"(%3784, %3833) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3835 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3836 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_14_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3837 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8379_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3838 = torch.operator "onnx.Unsqueeze"(%3797, %3837) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3839 = torch.operator "onnx.Concat"(%3834, %3835, %3836, %3838) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %3840 = torch.operator "onnx.Reshape"(%3786, %3804) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %3841 = torch.operator "onnx.Transpose"(%3840) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %3842 = torch.operator "onnx.Reshape"(%3788, %3811) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %3843 = torch.operator "onnx.Transpose"(%3842) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %3844 = torch.operator "onnx.Reshape"(%3790, %3818) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %3845 = torch.operator "onnx.Transpose"(%3844) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %3846 = torch.operator "onnx.Cast"(%3841) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %3847 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %3848 = torch.operator "onnx.Pow"(%3846, %3847) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %3849 = torch.operator "onnx.ReduceMean"(%3848) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3850 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %3851 = torch.operator "onnx.Add"(%3849, %3850) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3852 = torch.operator "onnx.Sqrt"(%3851) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3853 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %3854 = torch.operator "onnx.Div"(%3853, %3852) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3855 = torch.operator "onnx.Cast"(%3841) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %3856 = torch.operator "onnx.Mul"(%3855, %3854) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %3857 = torch.operator "onnx.Cast"(%3856) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %3858 = torch.operator "onnx.Mul"(%3857, %138) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %3859 = torch.operator "onnx.Cast"(%3843) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %3860 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %3861 = torch.operator "onnx.Pow"(%3859, %3860) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %3862 = torch.operator "onnx.ReduceMean"(%3861) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3863 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %3864 = torch.operator "onnx.Add"(%3862, %3863) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3865 = torch.operator "onnx.Sqrt"(%3864) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3866 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %3867 = torch.operator "onnx.Div"(%3866, %3865) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3868 = torch.operator "onnx.Cast"(%3843) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %3869 = torch.operator "onnx.Mul"(%3868, %3867) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %3870 = torch.operator "onnx.Cast"(%3869) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %3871 = torch.operator "onnx.Mul"(%3870, %139) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %3872 = torch.operator "onnx.MatMul"(%3781, %816) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3873 = torch.operator "onnx.Add"(%145, %3872) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3874 = torch.operator "onnx.MatMul"(%3781, %817) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3875 = torch.operator "onnx.Add"(%143, %3874) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3876 = torch.operator "onnx.MatMul"(%3781, %818) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3877 = torch.operator "onnx.Add"(%144, %3876) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %3878 = torch.operator "onnx.Reshape"(%3873, %3825) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %3879 = torch.operator "onnx.Transpose"(%3878) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %3880 = torch.operator "onnx.Reshape"(%3875, %3832) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %3881 = torch.operator "onnx.Transpose"(%3880) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %3882 = torch.operator "onnx.Reshape"(%3877, %3839) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %3883 = torch.operator "onnx.Transpose"(%3882) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %3884 = torch.operator "onnx.Cast"(%3879) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %3885 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_norm_added_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %3886 = torch.operator "onnx.Pow"(%3884, %3885) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %3887 = torch.operator "onnx.ReduceMean"(%3886) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3888 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_norm_added_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %3889 = torch.operator "onnx.Add"(%3887, %3888) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3890 = torch.operator "onnx.Sqrt"(%3889) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3891 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_norm_added_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %3892 = torch.operator "onnx.Div"(%3891, %3890) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3893 = torch.operator "onnx.Cast"(%3879) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %3894 = torch.operator "onnx.Mul"(%3893, %3892) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %3895 = torch.operator "onnx.Cast"(%3894) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %3896 = torch.operator "onnx.Mul"(%3895, %148) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %3897 = torch.operator "onnx.Cast"(%3881) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %3898 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_norm_added_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %3899 = torch.operator "onnx.Pow"(%3897, %3898) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %3900 = torch.operator "onnx.ReduceMean"(%3899) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3901 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_norm_added_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %3902 = torch.operator "onnx.Add"(%3900, %3901) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3903 = torch.operator "onnx.Sqrt"(%3902) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3904 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_norm_added_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %3905 = torch.operator "onnx.Div"(%3904, %3903) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %3906 = torch.operator "onnx.Cast"(%3881) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %3907 = torch.operator "onnx.Mul"(%3906, %3905) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %3908 = torch.operator "onnx.Cast"(%3907) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %3909 = torch.operator "onnx.Mul"(%3908, %149) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %3910 = torch.operator "onnx.Concat"(%3896, %3858) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %3911 = torch.operator "onnx.Concat"(%3909, %3871) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %3912 = torch.operator "onnx.Concat"(%3883, %3845) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %3913 = torch.operator "onnx.Shape"(%3910) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %3914 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_15_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %3915 = torch.operator "onnx.Gather"(%3913, %3914) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %3916 = torch.operator "onnx.Shape"(%3910) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %3917 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_16_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %3918 = torch.operator "onnx.Gather"(%3916, %3917) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %3919 = torch.operator "onnx.Shape"(%3910) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %3920 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_17_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %3921 = torch.operator "onnx.Gather"(%3919, %3920) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %3922 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8464_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3923 = torch.operator "onnx.Unsqueeze"(%3915, %3922) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3924 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8466_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3925 = torch.operator "onnx.Unsqueeze"(%3918, %3924) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3926 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8468_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3927 = torch.operator "onnx.Unsqueeze"(%3921, %3926) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3928 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3929 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3930 = torch.operator "onnx.Concat"(%3923, %3925, %3927, %3928, %3929) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %3931 = torch.operator "onnx.Reshape"(%3910, %3930) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %3932 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_20_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %3933:2 = torch.operator "onnx.Split"(%3931, %3932) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %3934 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3935 = torch.operator "onnx.Squeeze"(%3933#0, %3934) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %3936 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3937 = torch.operator "onnx.Squeeze"(%3933#1, %3936) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %3938 = torch.operator "onnx.Neg"(%3937) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %3939 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_23_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3940 = torch.operator "onnx.Unsqueeze"(%3938, %3939) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %3941 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_24_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3942 = torch.operator "onnx.Unsqueeze"(%3935, %3941) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %3943 = torch.operator "onnx.Concat"(%3940, %3942) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %3944 = torch.operator "onnx.Shape"(%3943) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %3945 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_25_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3946 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3947 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3948 = torch.operator "onnx.Slice"(%3944, %3946, %3947, %3945) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %3949 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_28_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3950 = torch.operator "onnx.Concat"(%3948, %3949) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %3951 = torch.operator "onnx.Reshape"(%3943, %3950) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %3952 = torch.operator "onnx.Cast"(%3910) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %3953 = torch.operator "onnx.Mul"(%3952, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %3954 = torch.operator "onnx.Cast"(%3951) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %3955 = torch.operator "onnx.Mul"(%3954, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %3956 = torch.operator "onnx.Add"(%3953, %3955) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %3957 = torch.operator "onnx.Cast"(%3956) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %3958 = torch.operator "onnx.Shape"(%3911) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %3959 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_29_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %3960 = torch.operator "onnx.Gather"(%3958, %3959) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %3961 = torch.operator "onnx.Shape"(%3911) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %3962 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_30_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %3963 = torch.operator "onnx.Gather"(%3961, %3962) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %3964 = torch.operator "onnx.Shape"(%3911) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %3965 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_31_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %3966 = torch.operator "onnx.Gather"(%3964, %3965) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %3967 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8509_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3968 = torch.operator "onnx.Unsqueeze"(%3960, %3967) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3969 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8511_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3970 = torch.operator "onnx.Unsqueeze"(%3963, %3969) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3971 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8513_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3972 = torch.operator "onnx.Unsqueeze"(%3966, %3971) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %3973 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3974 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3975 = torch.operator "onnx.Concat"(%3968, %3970, %3972, %3973, %3974) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %3976 = torch.operator "onnx.Reshape"(%3911, %3975) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %3977 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_34_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %3978:2 = torch.operator "onnx.Split"(%3976, %3977) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %3979 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3980 = torch.operator "onnx.Squeeze"(%3978#0, %3979) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %3981 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3982 = torch.operator "onnx.Squeeze"(%3978#1, %3981) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %3983 = torch.operator "onnx.Neg"(%3982) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %3984 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3985 = torch.operator "onnx.Unsqueeze"(%3983, %3984) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %3986 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3987 = torch.operator "onnx.Unsqueeze"(%3980, %3986) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %3988 = torch.operator "onnx.Concat"(%3985, %3987) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %3989 = torch.operator "onnx.Shape"(%3988) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %3990 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_39_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3991 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_40_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3992 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3993 = torch.operator "onnx.Slice"(%3989, %3991, %3992, %3990) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %3994 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_42_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %3995 = torch.operator "onnx.Concat"(%3993, %3994) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %3996 = torch.operator "onnx.Reshape"(%3988, %3995) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %3997 = torch.operator "onnx.Cast"(%3911) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %3998 = torch.operator "onnx.Mul"(%3997, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %3999 = torch.operator "onnx.Cast"(%3996) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %4000 = torch.operator "onnx.Mul"(%3999, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %4001 = torch.operator "onnx.Add"(%3998, %4000) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %4002 = torch.operator "onnx.Cast"(%4001) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %4003 = torch.operator "onnx.Shape"(%3957) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %4004 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_43_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4005 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_44_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4006 = torch.operator "onnx.Slice"(%4003, %4004, %4005) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4007 = torch.operator "onnx.Cast"(%4006) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %4008 = torch.operator "onnx.Sqrt"(%4007) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %4009 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_45_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %4010 = torch.operator "onnx.Cast"(%4008) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %4011 = torch.operator "onnx.Div"(%4009, %4010) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %4012 = torch.operator "onnx.Cast"(%4011) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %4013 = torch.operator "onnx.Transpose"(%4002) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %4014 = torch.operator "onnx.Sqrt"(%4012) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %4015 = torch.operator "onnx.Mul"(%3957, %4014) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %4016 = torch.operator "onnx.Sqrt"(%4012) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %4017 = torch.operator "onnx.Mul"(%4013, %4016) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %4018 = torch.operator "onnx.MatMul"(%4015, %4017) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %4019 = torch.operator "onnx.Softmax"(%4018) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %4020 = torch.operator "onnx.MatMul"(%4019, %3912) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %4021 = torch.operator "onnx.Transpose"(%4020) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %4022 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_46_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %4023 = torch.operator "onnx.Mul"(%3797, %4022) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %4024 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8566_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4025 = torch.operator "onnx.Unsqueeze"(%3784, %4024) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4026 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_47_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4027 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8569_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4028 = torch.operator "onnx.Unsqueeze"(%4023, %4027) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4029 = torch.operator "onnx.Concat"(%4025, %4026, %4028) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %4030 = torch.operator "onnx.Reshape"(%4021, %4029) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %4031 = torch.operator "onnx.Cast"(%4030) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %4032 = torch.operator "onnx.Shape"(%3781) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %4033 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_48_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %4034 = torch.operator "onnx.Gather"(%4032, %4033) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %4035 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_49_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4036 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_50_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4037 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_51_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4038 = torch.operator "onnx.Unsqueeze"(%4034, %4037) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4039 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_52_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4040 = torch.operator "onnx.Slice"(%4031, %4036, %4038, %4035, %4039) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %4041 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_53_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4042 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_54_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4043 = torch.operator "onnx.Unsqueeze"(%4034, %4042) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4044 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_55_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4045 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_attn_Constant_56_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4046 = torch.operator "onnx.Slice"(%4031, %4043, %4044, %4041, %4045) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %4047 = torch.operator "onnx.MatMul"(%4046, %819) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %4048 = torch.operator "onnx.Add"(%146, %4047) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %4049 = torch.operator "onnx.MatMul"(%4040, %820) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %4050 = torch.operator "onnx.Add"(%147, %4049) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %4051 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4052 = torch.operator "onnx.Unsqueeze"(%3723, %4051) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %4053 = torch.operator "onnx.Mul"(%4052, %4048) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %4054 = torch.operator "onnx.Add"(%3669, %4053) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %_2Ftransformer_blocks.62Fnorm22FConstant_attr__value = util.global.load @"/transformer_blocks.6/norm2/Constant_attr__value" : tensor<3072xbf16> %4055 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.62Fnorm22FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.62Fnorm22FConstant_1_attr__value = util.global.load @"/transformer_blocks.6/norm2/Constant_1_attr__value" : tensor<3072xbf16> %4056 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.62Fnorm22FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %4057 = torch.operator "onnx.LayerNormalization"(%4054, %4055, %4056) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4058 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4059 = torch.operator "onnx.Unsqueeze"(%3729, %4058) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %4060 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %4061 = torch.operator "onnx.Add"(%4059, %4060) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %4062 = torch.operator "onnx.Mul"(%4057, %4061) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4063 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4064 = torch.operator "onnx.Unsqueeze"(%3726, %4063) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %4065 = torch.operator "onnx.Add"(%4062, %4064) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4066 = torch.operator "onnx.MatMul"(%4065, %821) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %4067 = torch.operator "onnx.Add"(%150, %4066) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %4068 = torch.operator "onnx.Mul"(%4067, %4067) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %4069 = torch.operator "onnx.Mul"(%4067, %4068) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %4070 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_ff_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %4071 = torch.operator "onnx.Mul"(%4070, %4069) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %4072 = torch.operator "onnx.Add"(%4067, %4071) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %4073 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_ff_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %4074 = torch.operator "onnx.Mul"(%4073, %4072) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %4075 = torch.operator "onnx.Tanh"(%4074) : (!torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %4076 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_ff_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %4077 = torch.operator "onnx.Add"(%4076, %4075) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %4078 = torch.operator "onnx.Mul"(%4067, %4077) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %4079 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_ff_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %4080 = torch.operator "onnx.Mul"(%4079, %4078) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %4081 = torch.operator "onnx.MatMul"(%4080, %822) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4082 = torch.operator "onnx.Add"(%151, %4081) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4083 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4084 = torch.operator "onnx.Unsqueeze"(%3732, %4083) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %4085 = torch.operator "onnx.Mul"(%4084, %4082) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4086 = torch.operator "onnx.Add"(%4054, %4085) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4087 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4088 = torch.operator "onnx.Unsqueeze"(%3761, %4087) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %4089 = torch.operator "onnx.Mul"(%4088, %4050) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %4090 = torch.operator "onnx.Add"(%3705, %4089) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %_2Ftransformer_blocks.62Fnorm2_context2FConstant_attr__value = util.global.load @"/transformer_blocks.6/norm2_context/Constant_attr__value" : tensor<3072xbf16> %4091 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.62Fnorm2_context2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.62Fnorm2_context2FConstant_1_attr__value = util.global.load @"/transformer_blocks.6/norm2_context/Constant_1_attr__value" : tensor<3072xbf16> %4092 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.62Fnorm2_context2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %4093 = torch.operator "onnx.LayerNormalization"(%4090, %4091, %4092) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4094 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4095 = torch.operator "onnx.Unsqueeze"(%3767, %4094) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %4096 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_Constant_7_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %4097 = torch.operator "onnx.Add"(%4095, %4096) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %4098 = torch.operator "onnx.Mul"(%4093, %4097) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4099 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4100 = torch.operator "onnx.Unsqueeze"(%3764, %4099) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %4101 = torch.operator "onnx.Add"(%4098, %4100) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4102 = torch.operator "onnx.MatMul"(%4101, %823) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %4103 = torch.operator "onnx.Add"(%152, %4102) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %4104 = torch.operator "onnx.Mul"(%4103, %4103) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %4105 = torch.operator "onnx.Mul"(%4103, %4104) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %4106 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_ff_context_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %4107 = torch.operator "onnx.Mul"(%4106, %4105) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %4108 = torch.operator "onnx.Add"(%4103, %4107) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %4109 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_ff_context_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %4110 = torch.operator "onnx.Mul"(%4109, %4108) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %4111 = torch.operator "onnx.Tanh"(%4110) : (!torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %4112 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_ff_context_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %4113 = torch.operator "onnx.Add"(%4112, %4111) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %4114 = torch.operator "onnx.Mul"(%4103, %4113) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %4115 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_ff_context_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %4116 = torch.operator "onnx.Mul"(%4115, %4114) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %4117 = torch.operator "onnx.MatMul"(%4116, %824) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4118 = torch.operator "onnx.Add"(%153, %4117) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4119 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.6_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4120 = torch.operator "onnx.Unsqueeze"(%3770, %4119) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %4121 = torch.operator "onnx.Mul"(%4120, %4118) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4122 = torch.operator "onnx.Add"(%4090, %4121) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4123 = torch.operator "onnx.Gemm"(%1285, %154, %155) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %4124 = torch.operator "onnx.Shape"(%4123) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %4125 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_norm1_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4126 = torch.operator "onnx.Gather"(%4124, %4125) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4127 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_norm1_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4128 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_norm1_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4129 = torch.operator "onnx.Add"(%4126, %4128) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4130 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_norm1_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4131 = torch.operator "onnx.Div"(%4129, %4130) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4132 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_norm1_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4133 = torch.operator "onnx.Mul"(%4131, %4132) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4134 = torch.operator "onnx.Slice"(%4123, %4127, %4133, %4125) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %4135 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_norm1_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4136 = torch.operator "onnx.Mul"(%4131, %4135) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4137 = torch.operator "onnx.Slice"(%4123, %4133, %4136, %4125) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %4138 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_norm1_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4139 = torch.operator "onnx.Mul"(%4131, %4138) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4140 = torch.operator "onnx.Slice"(%4123, %4136, %4139, %4125) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %4141 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_norm1_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4142 = torch.operator "onnx.Mul"(%4131, %4141) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4143 = torch.operator "onnx.Slice"(%4123, %4139, %4142, %4125) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %4144 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_norm1_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4145 = torch.operator "onnx.Mul"(%4131, %4144) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4146 = torch.operator "onnx.Slice"(%4123, %4142, %4145, %4125) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %4147 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_norm1_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4148 = torch.operator "onnx.Mul"(%4131, %4147) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4149 = torch.operator "onnx.Slice"(%4123, %4145, %4148, %4125) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.72Fnorm12Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.7/norm1/norm/Constant_attr__value" : tensor<3072xbf16> %4150 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.72Fnorm12Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.72Fnorm12Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.7/norm1/norm/Constant_1_attr__value" : tensor<3072xbf16> %4151 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.72Fnorm12Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %4152 = torch.operator "onnx.LayerNormalization"(%4086, %4150, %4151) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4153 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_norm1_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4154 = torch.operator "onnx.Unsqueeze"(%4137, %4153) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %4155 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_norm1_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %4156 = torch.operator "onnx.Add"(%4154, %4155) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %4157 = torch.operator "onnx.Mul"(%4152, %4156) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4158 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_norm1_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4159 = torch.operator "onnx.Unsqueeze"(%4134, %4158) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %4160 = torch.operator "onnx.Add"(%4157, %4159) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4161 = torch.operator "onnx.Gemm"(%1285, %156, %157) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %4162 = torch.operator "onnx.Shape"(%4161) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %4163 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_norm1_context_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4164 = torch.operator "onnx.Gather"(%4162, %4163) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4165 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_norm1_context_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4166 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_norm1_context_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4167 = torch.operator "onnx.Add"(%4164, %4166) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4168 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_norm1_context_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4169 = torch.operator "onnx.Div"(%4167, %4168) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4170 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_norm1_context_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4171 = torch.operator "onnx.Mul"(%4169, %4170) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4172 = torch.operator "onnx.Slice"(%4161, %4165, %4171, %4163) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %4173 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_norm1_context_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4174 = torch.operator "onnx.Mul"(%4169, %4173) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4175 = torch.operator "onnx.Slice"(%4161, %4171, %4174, %4163) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %4176 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_norm1_context_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4177 = torch.operator "onnx.Mul"(%4169, %4176) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4178 = torch.operator "onnx.Slice"(%4161, %4174, %4177, %4163) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %4179 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_norm1_context_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4180 = torch.operator "onnx.Mul"(%4169, %4179) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4181 = torch.operator "onnx.Slice"(%4161, %4177, %4180, %4163) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %4182 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_norm1_context_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4183 = torch.operator "onnx.Mul"(%4169, %4182) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4184 = torch.operator "onnx.Slice"(%4161, %4180, %4183, %4163) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %4185 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_norm1_context_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4186 = torch.operator "onnx.Mul"(%4169, %4185) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4187 = torch.operator "onnx.Slice"(%4161, %4183, %4186, %4163) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.72Fnorm1_context2Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.7/norm1_context/norm/Constant_attr__value" : tensor<3072xbf16> %4188 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.72Fnorm1_context2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.72Fnorm1_context2Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.7/norm1_context/norm/Constant_1_attr__value" : tensor<3072xbf16> %4189 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.72Fnorm1_context2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %4190 = torch.operator "onnx.LayerNormalization"(%4122, %4188, %4189) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4191 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_norm1_context_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4192 = torch.operator "onnx.Unsqueeze"(%4175, %4191) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %4193 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_norm1_context_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %4194 = torch.operator "onnx.Add"(%4192, %4193) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %4195 = torch.operator "onnx.Mul"(%4190, %4194) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4196 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_norm1_context_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4197 = torch.operator "onnx.Unsqueeze"(%4172, %4196) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %4198 = torch.operator "onnx.Add"(%4195, %4197) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4199 = torch.operator "onnx.Shape"(%4198) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %4200 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %4201 = torch.operator "onnx.Gather"(%4199, %4200) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %4202 = torch.operator "onnx.MatMul"(%4160, %825) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4203 = torch.operator "onnx.Add"(%160, %4202) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4204 = torch.operator "onnx.MatMul"(%4160, %826) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4205 = torch.operator "onnx.Add"(%161, %4204) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4206 = torch.operator "onnx.MatMul"(%4160, %827) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4207 = torch.operator "onnx.Add"(%162, %4206) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4208 = torch.operator "onnx.Shape"(%4205) : (!torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[3],si64> %4209 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %4210 = torch.operator "onnx.Gather"(%4208, %4209) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %4211 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %4212 = torch.operator "onnx.Div"(%4210, %4211) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %4213 = torch.operator "onnx.Cast"(%4212) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %4214 = torch.operator "onnx.Cast"(%4213) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %4215 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8757_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4216 = torch.operator "onnx.Unsqueeze"(%4201, %4215) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4217 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4218 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4219 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8761_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4220 = torch.operator "onnx.Unsqueeze"(%4214, %4219) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4221 = torch.operator "onnx.Concat"(%4216, %4217, %4218, %4220) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %4222 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8764_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4223 = torch.operator "onnx.Unsqueeze"(%4201, %4222) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4224 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4225 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4226 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8768_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4227 = torch.operator "onnx.Unsqueeze"(%4214, %4226) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4228 = torch.operator "onnx.Concat"(%4223, %4224, %4225, %4227) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %4229 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8771_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4230 = torch.operator "onnx.Unsqueeze"(%4201, %4229) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4231 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4232 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4233 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8775_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4234 = torch.operator "onnx.Unsqueeze"(%4214, %4233) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4235 = torch.operator "onnx.Concat"(%4230, %4231, %4232, %4234) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %4236 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8778_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4237 = torch.operator "onnx.Unsqueeze"(%4201, %4236) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4238 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4239 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4240 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8782_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4241 = torch.operator "onnx.Unsqueeze"(%4214, %4240) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4242 = torch.operator "onnx.Concat"(%4237, %4238, %4239, %4241) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %4243 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8785_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4244 = torch.operator "onnx.Unsqueeze"(%4201, %4243) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4245 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_11_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4246 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4247 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8789_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4248 = torch.operator "onnx.Unsqueeze"(%4214, %4247) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4249 = torch.operator "onnx.Concat"(%4244, %4245, %4246, %4248) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %4250 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8792_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4251 = torch.operator "onnx.Unsqueeze"(%4201, %4250) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4252 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4253 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_14_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4254 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8796_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4255 = torch.operator "onnx.Unsqueeze"(%4214, %4254) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4256 = torch.operator "onnx.Concat"(%4251, %4252, %4253, %4255) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %4257 = torch.operator "onnx.Reshape"(%4203, %4221) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %4258 = torch.operator "onnx.Transpose"(%4257) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %4259 = torch.operator "onnx.Reshape"(%4205, %4228) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %4260 = torch.operator "onnx.Transpose"(%4259) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %4261 = torch.operator "onnx.Reshape"(%4207, %4235) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %4262 = torch.operator "onnx.Transpose"(%4261) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %4263 = torch.operator "onnx.Cast"(%4258) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %4264 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %4265 = torch.operator "onnx.Pow"(%4263, %4264) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %4266 = torch.operator "onnx.ReduceMean"(%4265) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %4267 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %4268 = torch.operator "onnx.Add"(%4266, %4267) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %4269 = torch.operator "onnx.Sqrt"(%4268) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %4270 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %4271 = torch.operator "onnx.Div"(%4270, %4269) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %4272 = torch.operator "onnx.Cast"(%4258) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %4273 = torch.operator "onnx.Mul"(%4272, %4271) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %4274 = torch.operator "onnx.Cast"(%4273) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %4275 = torch.operator "onnx.Mul"(%4274, %158) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %4276 = torch.operator "onnx.Cast"(%4260) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %4277 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %4278 = torch.operator "onnx.Pow"(%4276, %4277) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %4279 = torch.operator "onnx.ReduceMean"(%4278) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %4280 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %4281 = torch.operator "onnx.Add"(%4279, %4280) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %4282 = torch.operator "onnx.Sqrt"(%4281) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %4283 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %4284 = torch.operator "onnx.Div"(%4283, %4282) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %4285 = torch.operator "onnx.Cast"(%4260) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %4286 = torch.operator "onnx.Mul"(%4285, %4284) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %4287 = torch.operator "onnx.Cast"(%4286) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %4288 = torch.operator "onnx.Mul"(%4287, %159) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %4289 = torch.operator "onnx.MatMul"(%4198, %828) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4290 = torch.operator "onnx.Add"(%165, %4289) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4291 = torch.operator "onnx.MatMul"(%4198, %829) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4292 = torch.operator "onnx.Add"(%163, %4291) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4293 = torch.operator "onnx.MatMul"(%4198, %830) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4294 = torch.operator "onnx.Add"(%164, %4293) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4295 = torch.operator "onnx.Reshape"(%4290, %4242) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %4296 = torch.operator "onnx.Transpose"(%4295) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %4297 = torch.operator "onnx.Reshape"(%4292, %4249) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %4298 = torch.operator "onnx.Transpose"(%4297) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %4299 = torch.operator "onnx.Reshape"(%4294, %4256) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %4300 = torch.operator "onnx.Transpose"(%4299) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %4301 = torch.operator "onnx.Cast"(%4296) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %4302 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_norm_added_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %4303 = torch.operator "onnx.Pow"(%4301, %4302) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %4304 = torch.operator "onnx.ReduceMean"(%4303) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %4305 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_norm_added_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %4306 = torch.operator "onnx.Add"(%4304, %4305) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %4307 = torch.operator "onnx.Sqrt"(%4306) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %4308 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_norm_added_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %4309 = torch.operator "onnx.Div"(%4308, %4307) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %4310 = torch.operator "onnx.Cast"(%4296) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %4311 = torch.operator "onnx.Mul"(%4310, %4309) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %4312 = torch.operator "onnx.Cast"(%4311) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %4313 = torch.operator "onnx.Mul"(%4312, %168) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %4314 = torch.operator "onnx.Cast"(%4298) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %4315 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_norm_added_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %4316 = torch.operator "onnx.Pow"(%4314, %4315) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %4317 = torch.operator "onnx.ReduceMean"(%4316) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %4318 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_norm_added_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %4319 = torch.operator "onnx.Add"(%4317, %4318) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %4320 = torch.operator "onnx.Sqrt"(%4319) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %4321 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_norm_added_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %4322 = torch.operator "onnx.Div"(%4321, %4320) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %4323 = torch.operator "onnx.Cast"(%4298) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %4324 = torch.operator "onnx.Mul"(%4323, %4322) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %4325 = torch.operator "onnx.Cast"(%4324) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %4326 = torch.operator "onnx.Mul"(%4325, %169) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %4327 = torch.operator "onnx.Concat"(%4313, %4275) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %4328 = torch.operator "onnx.Concat"(%4326, %4288) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %4329 = torch.operator "onnx.Concat"(%4300, %4262) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %4330 = torch.operator "onnx.Shape"(%4327) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %4331 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_15_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %4332 = torch.operator "onnx.Gather"(%4330, %4331) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %4333 = torch.operator "onnx.Shape"(%4327) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %4334 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_16_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %4335 = torch.operator "onnx.Gather"(%4333, %4334) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %4336 = torch.operator "onnx.Shape"(%4327) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %4337 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_17_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %4338 = torch.operator "onnx.Gather"(%4336, %4337) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %4339 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8881_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4340 = torch.operator "onnx.Unsqueeze"(%4332, %4339) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4341 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8883_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4342 = torch.operator "onnx.Unsqueeze"(%4335, %4341) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4343 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8885_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4344 = torch.operator "onnx.Unsqueeze"(%4338, %4343) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4345 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4346 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4347 = torch.operator "onnx.Concat"(%4340, %4342, %4344, %4345, %4346) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %4348 = torch.operator "onnx.Reshape"(%4327, %4347) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %4349 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_20_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %4350:2 = torch.operator "onnx.Split"(%4348, %4349) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %4351 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4352 = torch.operator "onnx.Squeeze"(%4350#0, %4351) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %4353 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4354 = torch.operator "onnx.Squeeze"(%4350#1, %4353) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %4355 = torch.operator "onnx.Neg"(%4354) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %4356 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_23_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4357 = torch.operator "onnx.Unsqueeze"(%4355, %4356) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %4358 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_24_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4359 = torch.operator "onnx.Unsqueeze"(%4352, %4358) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %4360 = torch.operator "onnx.Concat"(%4357, %4359) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %4361 = torch.operator "onnx.Shape"(%4360) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %4362 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_25_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4363 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4364 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4365 = torch.operator "onnx.Slice"(%4361, %4363, %4364, %4362) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %4366 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_28_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4367 = torch.operator "onnx.Concat"(%4365, %4366) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %4368 = torch.operator "onnx.Reshape"(%4360, %4367) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %4369 = torch.operator "onnx.Cast"(%4327) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %4370 = torch.operator "onnx.Mul"(%4369, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %4371 = torch.operator "onnx.Cast"(%4368) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %4372 = torch.operator "onnx.Mul"(%4371, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %4373 = torch.operator "onnx.Add"(%4370, %4372) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %4374 = torch.operator "onnx.Cast"(%4373) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %4375 = torch.operator "onnx.Shape"(%4328) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %4376 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_29_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %4377 = torch.operator "onnx.Gather"(%4375, %4376) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %4378 = torch.operator "onnx.Shape"(%4328) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %4379 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_30_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %4380 = torch.operator "onnx.Gather"(%4378, %4379) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %4381 = torch.operator "onnx.Shape"(%4328) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %4382 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_31_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %4383 = torch.operator "onnx.Gather"(%4381, %4382) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %4384 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8926_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4385 = torch.operator "onnx.Unsqueeze"(%4377, %4384) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4386 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8928_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4387 = torch.operator "onnx.Unsqueeze"(%4380, %4386) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4388 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8930_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4389 = torch.operator "onnx.Unsqueeze"(%4383, %4388) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4390 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4391 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4392 = torch.operator "onnx.Concat"(%4385, %4387, %4389, %4390, %4391) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %4393 = torch.operator "onnx.Reshape"(%4328, %4392) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %4394 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_34_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %4395:2 = torch.operator "onnx.Split"(%4393, %4394) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %4396 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4397 = torch.operator "onnx.Squeeze"(%4395#0, %4396) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %4398 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4399 = torch.operator "onnx.Squeeze"(%4395#1, %4398) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %4400 = torch.operator "onnx.Neg"(%4399) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %4401 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4402 = torch.operator "onnx.Unsqueeze"(%4400, %4401) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %4403 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4404 = torch.operator "onnx.Unsqueeze"(%4397, %4403) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %4405 = torch.operator "onnx.Concat"(%4402, %4404) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %4406 = torch.operator "onnx.Shape"(%4405) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %4407 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_39_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4408 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_40_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4409 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4410 = torch.operator "onnx.Slice"(%4406, %4408, %4409, %4407) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %4411 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_42_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4412 = torch.operator "onnx.Concat"(%4410, %4411) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %4413 = torch.operator "onnx.Reshape"(%4405, %4412) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %4414 = torch.operator "onnx.Cast"(%4328) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %4415 = torch.operator "onnx.Mul"(%4414, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %4416 = torch.operator "onnx.Cast"(%4413) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %4417 = torch.operator "onnx.Mul"(%4416, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %4418 = torch.operator "onnx.Add"(%4415, %4417) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %4419 = torch.operator "onnx.Cast"(%4418) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %4420 = torch.operator "onnx.Shape"(%4374) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %4421 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_43_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4422 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_44_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4423 = torch.operator "onnx.Slice"(%4420, %4421, %4422) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4424 = torch.operator "onnx.Cast"(%4423) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %4425 = torch.operator "onnx.Sqrt"(%4424) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %4426 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_45_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %4427 = torch.operator "onnx.Cast"(%4425) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %4428 = torch.operator "onnx.Div"(%4426, %4427) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %4429 = torch.operator "onnx.Cast"(%4428) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %4430 = torch.operator "onnx.Transpose"(%4419) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %4431 = torch.operator "onnx.Sqrt"(%4429) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %4432 = torch.operator "onnx.Mul"(%4374, %4431) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %4433 = torch.operator "onnx.Sqrt"(%4429) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %4434 = torch.operator "onnx.Mul"(%4430, %4433) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %4435 = torch.operator "onnx.MatMul"(%4432, %4434) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %4436 = torch.operator "onnx.Softmax"(%4435) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %4437 = torch.operator "onnx.MatMul"(%4436, %4329) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %4438 = torch.operator "onnx.Transpose"(%4437) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %4439 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_46_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %4440 = torch.operator "onnx.Mul"(%4214, %4439) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %4441 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8983_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4442 = torch.operator "onnx.Unsqueeze"(%4201, %4441) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4443 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_47_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4444 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_8986_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4445 = torch.operator "onnx.Unsqueeze"(%4440, %4444) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4446 = torch.operator "onnx.Concat"(%4442, %4443, %4445) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %4447 = torch.operator "onnx.Reshape"(%4438, %4446) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %4448 = torch.operator "onnx.Cast"(%4447) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %4449 = torch.operator "onnx.Shape"(%4198) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %4450 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_48_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %4451 = torch.operator "onnx.Gather"(%4449, %4450) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %4452 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_49_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4453 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_50_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4454 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_51_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4455 = torch.operator "onnx.Unsqueeze"(%4451, %4454) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4456 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_52_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4457 = torch.operator "onnx.Slice"(%4448, %4453, %4455, %4452, %4456) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %4458 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_53_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4459 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_54_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4460 = torch.operator "onnx.Unsqueeze"(%4451, %4459) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4461 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_55_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4462 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_attn_Constant_56_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4463 = torch.operator "onnx.Slice"(%4448, %4460, %4461, %4458, %4462) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %4464 = torch.operator "onnx.MatMul"(%4463, %831) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %4465 = torch.operator "onnx.Add"(%166, %4464) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %4466 = torch.operator "onnx.MatMul"(%4457, %832) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %4467 = torch.operator "onnx.Add"(%167, %4466) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %4468 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4469 = torch.operator "onnx.Unsqueeze"(%4140, %4468) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %4470 = torch.operator "onnx.Mul"(%4469, %4465) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %4471 = torch.operator "onnx.Add"(%4086, %4470) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %_2Ftransformer_blocks.72Fnorm22FConstant_attr__value = util.global.load @"/transformer_blocks.7/norm2/Constant_attr__value" : tensor<3072xbf16> %4472 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.72Fnorm22FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.72Fnorm22FConstant_1_attr__value = util.global.load @"/transformer_blocks.7/norm2/Constant_1_attr__value" : tensor<3072xbf16> %4473 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.72Fnorm22FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %4474 = torch.operator "onnx.LayerNormalization"(%4471, %4472, %4473) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4475 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4476 = torch.operator "onnx.Unsqueeze"(%4146, %4475) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %4477 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %4478 = torch.operator "onnx.Add"(%4476, %4477) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %4479 = torch.operator "onnx.Mul"(%4474, %4478) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4480 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4481 = torch.operator "onnx.Unsqueeze"(%4143, %4480) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %4482 = torch.operator "onnx.Add"(%4479, %4481) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4483 = torch.operator "onnx.MatMul"(%4482, %833) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %4484 = torch.operator "onnx.Add"(%170, %4483) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %4485 = torch.operator "onnx.Mul"(%4484, %4484) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %4486 = torch.operator "onnx.Mul"(%4484, %4485) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %4487 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_ff_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %4488 = torch.operator "onnx.Mul"(%4487, %4486) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %4489 = torch.operator "onnx.Add"(%4484, %4488) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %4490 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_ff_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %4491 = torch.operator "onnx.Mul"(%4490, %4489) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %4492 = torch.operator "onnx.Tanh"(%4491) : (!torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %4493 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_ff_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %4494 = torch.operator "onnx.Add"(%4493, %4492) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %4495 = torch.operator "onnx.Mul"(%4484, %4494) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %4496 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_ff_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %4497 = torch.operator "onnx.Mul"(%4496, %4495) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %4498 = torch.operator "onnx.MatMul"(%4497, %834) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4499 = torch.operator "onnx.Add"(%171, %4498) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4500 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4501 = torch.operator "onnx.Unsqueeze"(%4149, %4500) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %4502 = torch.operator "onnx.Mul"(%4501, %4499) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4503 = torch.operator "onnx.Add"(%4471, %4502) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4504 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4505 = torch.operator "onnx.Unsqueeze"(%4178, %4504) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %4506 = torch.operator "onnx.Mul"(%4505, %4467) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %4507 = torch.operator "onnx.Add"(%4122, %4506) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %_2Ftransformer_blocks.72Fnorm2_context2FConstant_attr__value = util.global.load @"/transformer_blocks.7/norm2_context/Constant_attr__value" : tensor<3072xbf16> %4508 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.72Fnorm2_context2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.72Fnorm2_context2FConstant_1_attr__value = util.global.load @"/transformer_blocks.7/norm2_context/Constant_1_attr__value" : tensor<3072xbf16> %4509 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.72Fnorm2_context2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %4510 = torch.operator "onnx.LayerNormalization"(%4507, %4508, %4509) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4511 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4512 = torch.operator "onnx.Unsqueeze"(%4184, %4511) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %4513 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_Constant_7_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %4514 = torch.operator "onnx.Add"(%4512, %4513) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %4515 = torch.operator "onnx.Mul"(%4510, %4514) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4516 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4517 = torch.operator "onnx.Unsqueeze"(%4181, %4516) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %4518 = torch.operator "onnx.Add"(%4515, %4517) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4519 = torch.operator "onnx.MatMul"(%4518, %835) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %4520 = torch.operator "onnx.Add"(%172, %4519) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %4521 = torch.operator "onnx.Mul"(%4520, %4520) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %4522 = torch.operator "onnx.Mul"(%4520, %4521) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %4523 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_ff_context_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %4524 = torch.operator "onnx.Mul"(%4523, %4522) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %4525 = torch.operator "onnx.Add"(%4520, %4524) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %4526 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_ff_context_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %4527 = torch.operator "onnx.Mul"(%4526, %4525) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %4528 = torch.operator "onnx.Tanh"(%4527) : (!torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %4529 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_ff_context_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %4530 = torch.operator "onnx.Add"(%4529, %4528) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %4531 = torch.operator "onnx.Mul"(%4520, %4530) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %4532 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_ff_context_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %4533 = torch.operator "onnx.Mul"(%4532, %4531) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %4534 = torch.operator "onnx.MatMul"(%4533, %836) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4535 = torch.operator "onnx.Add"(%173, %4534) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4536 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.7_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4537 = torch.operator "onnx.Unsqueeze"(%4187, %4536) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %4538 = torch.operator "onnx.Mul"(%4537, %4535) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4539 = torch.operator "onnx.Add"(%4507, %4538) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4540 = torch.operator "onnx.Gemm"(%1285, %174, %175) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %4541 = torch.operator "onnx.Shape"(%4540) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %4542 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_norm1_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4543 = torch.operator "onnx.Gather"(%4541, %4542) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4544 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_norm1_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4545 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_norm1_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4546 = torch.operator "onnx.Add"(%4543, %4545) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4547 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_norm1_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4548 = torch.operator "onnx.Div"(%4546, %4547) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4549 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_norm1_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4550 = torch.operator "onnx.Mul"(%4548, %4549) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4551 = torch.operator "onnx.Slice"(%4540, %4544, %4550, %4542) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %4552 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_norm1_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4553 = torch.operator "onnx.Mul"(%4548, %4552) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4554 = torch.operator "onnx.Slice"(%4540, %4550, %4553, %4542) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %4555 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_norm1_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4556 = torch.operator "onnx.Mul"(%4548, %4555) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4557 = torch.operator "onnx.Slice"(%4540, %4553, %4556, %4542) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %4558 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_norm1_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4559 = torch.operator "onnx.Mul"(%4548, %4558) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4560 = torch.operator "onnx.Slice"(%4540, %4556, %4559, %4542) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %4561 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_norm1_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4562 = torch.operator "onnx.Mul"(%4548, %4561) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4563 = torch.operator "onnx.Slice"(%4540, %4559, %4562, %4542) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %4564 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_norm1_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4565 = torch.operator "onnx.Mul"(%4548, %4564) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4566 = torch.operator "onnx.Slice"(%4540, %4562, %4565, %4542) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.82Fnorm12Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.8/norm1/norm/Constant_attr__value" : tensor<3072xbf16> %4567 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.82Fnorm12Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.82Fnorm12Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.8/norm1/norm/Constant_1_attr__value" : tensor<3072xbf16> %4568 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.82Fnorm12Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %4569 = torch.operator "onnx.LayerNormalization"(%4503, %4567, %4568) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4570 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_norm1_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4571 = torch.operator "onnx.Unsqueeze"(%4554, %4570) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %4572 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_norm1_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %4573 = torch.operator "onnx.Add"(%4571, %4572) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %4574 = torch.operator "onnx.Mul"(%4569, %4573) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4575 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_norm1_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4576 = torch.operator "onnx.Unsqueeze"(%4551, %4575) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %4577 = torch.operator "onnx.Add"(%4574, %4576) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4578 = torch.operator "onnx.Gemm"(%1285, %176, %177) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %4579 = torch.operator "onnx.Shape"(%4578) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %4580 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_norm1_context_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4581 = torch.operator "onnx.Gather"(%4579, %4580) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4582 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_norm1_context_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4583 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_norm1_context_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4584 = torch.operator "onnx.Add"(%4581, %4583) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4585 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_norm1_context_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4586 = torch.operator "onnx.Div"(%4584, %4585) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4587 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_norm1_context_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4588 = torch.operator "onnx.Mul"(%4586, %4587) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4589 = torch.operator "onnx.Slice"(%4578, %4582, %4588, %4580) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %4590 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_norm1_context_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4591 = torch.operator "onnx.Mul"(%4586, %4590) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4592 = torch.operator "onnx.Slice"(%4578, %4588, %4591, %4580) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %4593 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_norm1_context_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4594 = torch.operator "onnx.Mul"(%4586, %4593) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4595 = torch.operator "onnx.Slice"(%4578, %4591, %4594, %4580) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %4596 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_norm1_context_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4597 = torch.operator "onnx.Mul"(%4586, %4596) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4598 = torch.operator "onnx.Slice"(%4578, %4594, %4597, %4580) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %4599 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_norm1_context_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4600 = torch.operator "onnx.Mul"(%4586, %4599) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4601 = torch.operator "onnx.Slice"(%4578, %4597, %4600, %4580) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %4602 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_norm1_context_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4603 = torch.operator "onnx.Mul"(%4586, %4602) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4604 = torch.operator "onnx.Slice"(%4578, %4600, %4603, %4580) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.82Fnorm1_context2Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.8/norm1_context/norm/Constant_attr__value" : tensor<3072xbf16> %4605 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.82Fnorm1_context2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.82Fnorm1_context2Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.8/norm1_context/norm/Constant_1_attr__value" : tensor<3072xbf16> %4606 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.82Fnorm1_context2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %4607 = torch.operator "onnx.LayerNormalization"(%4539, %4605, %4606) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4608 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_norm1_context_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4609 = torch.operator "onnx.Unsqueeze"(%4592, %4608) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %4610 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_norm1_context_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %4611 = torch.operator "onnx.Add"(%4609, %4610) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %4612 = torch.operator "onnx.Mul"(%4607, %4611) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4613 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_norm1_context_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4614 = torch.operator "onnx.Unsqueeze"(%4589, %4613) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %4615 = torch.operator "onnx.Add"(%4612, %4614) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4616 = torch.operator "onnx.Shape"(%4615) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %4617 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %4618 = torch.operator "onnx.Gather"(%4616, %4617) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %4619 = torch.operator "onnx.MatMul"(%4577, %837) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4620 = torch.operator "onnx.Add"(%180, %4619) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4621 = torch.operator "onnx.MatMul"(%4577, %838) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4622 = torch.operator "onnx.Add"(%181, %4621) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4623 = torch.operator "onnx.MatMul"(%4577, %839) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4624 = torch.operator "onnx.Add"(%182, %4623) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4625 = torch.operator "onnx.Shape"(%4622) : (!torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[3],si64> %4626 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %4627 = torch.operator "onnx.Gather"(%4625, %4626) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %4628 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %4629 = torch.operator "onnx.Div"(%4627, %4628) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %4630 = torch.operator "onnx.Cast"(%4629) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %4631 = torch.operator "onnx.Cast"(%4630) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %4632 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9174_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4633 = torch.operator "onnx.Unsqueeze"(%4618, %4632) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4634 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4635 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4636 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9178_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4637 = torch.operator "onnx.Unsqueeze"(%4631, %4636) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4638 = torch.operator "onnx.Concat"(%4633, %4634, %4635, %4637) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %4639 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9181_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4640 = torch.operator "onnx.Unsqueeze"(%4618, %4639) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4641 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4642 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4643 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9185_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4644 = torch.operator "onnx.Unsqueeze"(%4631, %4643) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4645 = torch.operator "onnx.Concat"(%4640, %4641, %4642, %4644) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %4646 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9188_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4647 = torch.operator "onnx.Unsqueeze"(%4618, %4646) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4648 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4649 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4650 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9192_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4651 = torch.operator "onnx.Unsqueeze"(%4631, %4650) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4652 = torch.operator "onnx.Concat"(%4647, %4648, %4649, %4651) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %4653 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9195_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4654 = torch.operator "onnx.Unsqueeze"(%4618, %4653) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4655 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4656 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4657 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9199_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4658 = torch.operator "onnx.Unsqueeze"(%4631, %4657) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4659 = torch.operator "onnx.Concat"(%4654, %4655, %4656, %4658) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %4660 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9202_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4661 = torch.operator "onnx.Unsqueeze"(%4618, %4660) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4662 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_11_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4663 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4664 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9206_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4665 = torch.operator "onnx.Unsqueeze"(%4631, %4664) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4666 = torch.operator "onnx.Concat"(%4661, %4662, %4663, %4665) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %4667 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9209_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4668 = torch.operator "onnx.Unsqueeze"(%4618, %4667) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4669 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4670 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_14_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4671 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9213_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4672 = torch.operator "onnx.Unsqueeze"(%4631, %4671) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4673 = torch.operator "onnx.Concat"(%4668, %4669, %4670, %4672) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %4674 = torch.operator "onnx.Reshape"(%4620, %4638) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %4675 = torch.operator "onnx.Transpose"(%4674) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %4676 = torch.operator "onnx.Reshape"(%4622, %4645) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %4677 = torch.operator "onnx.Transpose"(%4676) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %4678 = torch.operator "onnx.Reshape"(%4624, %4652) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %4679 = torch.operator "onnx.Transpose"(%4678) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %4680 = torch.operator "onnx.Cast"(%4675) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %4681 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %4682 = torch.operator "onnx.Pow"(%4680, %4681) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %4683 = torch.operator "onnx.ReduceMean"(%4682) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %4684 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %4685 = torch.operator "onnx.Add"(%4683, %4684) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %4686 = torch.operator "onnx.Sqrt"(%4685) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %4687 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %4688 = torch.operator "onnx.Div"(%4687, %4686) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %4689 = torch.operator "onnx.Cast"(%4675) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %4690 = torch.operator "onnx.Mul"(%4689, %4688) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %4691 = torch.operator "onnx.Cast"(%4690) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %4692 = torch.operator "onnx.Mul"(%4691, %178) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %4693 = torch.operator "onnx.Cast"(%4677) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %4694 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %4695 = torch.operator "onnx.Pow"(%4693, %4694) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %4696 = torch.operator "onnx.ReduceMean"(%4695) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %4697 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %4698 = torch.operator "onnx.Add"(%4696, %4697) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %4699 = torch.operator "onnx.Sqrt"(%4698) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %4700 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %4701 = torch.operator "onnx.Div"(%4700, %4699) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %4702 = torch.operator "onnx.Cast"(%4677) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %4703 = torch.operator "onnx.Mul"(%4702, %4701) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %4704 = torch.operator "onnx.Cast"(%4703) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %4705 = torch.operator "onnx.Mul"(%4704, %179) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %4706 = torch.operator "onnx.MatMul"(%4615, %840) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4707 = torch.operator "onnx.Add"(%185, %4706) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4708 = torch.operator "onnx.MatMul"(%4615, %841) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4709 = torch.operator "onnx.Add"(%183, %4708) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4710 = torch.operator "onnx.MatMul"(%4615, %842) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4711 = torch.operator "onnx.Add"(%184, %4710) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4712 = torch.operator "onnx.Reshape"(%4707, %4659) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %4713 = torch.operator "onnx.Transpose"(%4712) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %4714 = torch.operator "onnx.Reshape"(%4709, %4666) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %4715 = torch.operator "onnx.Transpose"(%4714) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %4716 = torch.operator "onnx.Reshape"(%4711, %4673) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %4717 = torch.operator "onnx.Transpose"(%4716) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %4718 = torch.operator "onnx.Cast"(%4713) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %4719 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_norm_added_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %4720 = torch.operator "onnx.Pow"(%4718, %4719) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %4721 = torch.operator "onnx.ReduceMean"(%4720) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %4722 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_norm_added_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %4723 = torch.operator "onnx.Add"(%4721, %4722) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %4724 = torch.operator "onnx.Sqrt"(%4723) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %4725 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_norm_added_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %4726 = torch.operator "onnx.Div"(%4725, %4724) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %4727 = torch.operator "onnx.Cast"(%4713) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %4728 = torch.operator "onnx.Mul"(%4727, %4726) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %4729 = torch.operator "onnx.Cast"(%4728) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %4730 = torch.operator "onnx.Mul"(%4729, %188) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %4731 = torch.operator "onnx.Cast"(%4715) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %4732 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_norm_added_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %4733 = torch.operator "onnx.Pow"(%4731, %4732) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %4734 = torch.operator "onnx.ReduceMean"(%4733) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %4735 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_norm_added_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %4736 = torch.operator "onnx.Add"(%4734, %4735) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %4737 = torch.operator "onnx.Sqrt"(%4736) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %4738 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_norm_added_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %4739 = torch.operator "onnx.Div"(%4738, %4737) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %4740 = torch.operator "onnx.Cast"(%4715) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %4741 = torch.operator "onnx.Mul"(%4740, %4739) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %4742 = torch.operator "onnx.Cast"(%4741) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %4743 = torch.operator "onnx.Mul"(%4742, %189) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %4744 = torch.operator "onnx.Concat"(%4730, %4692) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %4745 = torch.operator "onnx.Concat"(%4743, %4705) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %4746 = torch.operator "onnx.Concat"(%4717, %4679) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %4747 = torch.operator "onnx.Shape"(%4744) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %4748 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_15_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %4749 = torch.operator "onnx.Gather"(%4747, %4748) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %4750 = torch.operator "onnx.Shape"(%4744) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %4751 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_16_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %4752 = torch.operator "onnx.Gather"(%4750, %4751) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %4753 = torch.operator "onnx.Shape"(%4744) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %4754 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_17_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %4755 = torch.operator "onnx.Gather"(%4753, %4754) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %4756 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9298_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4757 = torch.operator "onnx.Unsqueeze"(%4749, %4756) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4758 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9300_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4759 = torch.operator "onnx.Unsqueeze"(%4752, %4758) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4760 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9302_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4761 = torch.operator "onnx.Unsqueeze"(%4755, %4760) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4762 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4763 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4764 = torch.operator "onnx.Concat"(%4757, %4759, %4761, %4762, %4763) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %4765 = torch.operator "onnx.Reshape"(%4744, %4764) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %4766 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_20_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %4767:2 = torch.operator "onnx.Split"(%4765, %4766) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %4768 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4769 = torch.operator "onnx.Squeeze"(%4767#0, %4768) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %4770 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4771 = torch.operator "onnx.Squeeze"(%4767#1, %4770) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %4772 = torch.operator "onnx.Neg"(%4771) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %4773 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_23_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4774 = torch.operator "onnx.Unsqueeze"(%4772, %4773) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %4775 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_24_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4776 = torch.operator "onnx.Unsqueeze"(%4769, %4775) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %4777 = torch.operator "onnx.Concat"(%4774, %4776) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %4778 = torch.operator "onnx.Shape"(%4777) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %4779 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_25_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4780 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4781 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4782 = torch.operator "onnx.Slice"(%4778, %4780, %4781, %4779) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %4783 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_28_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4784 = torch.operator "onnx.Concat"(%4782, %4783) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %4785 = torch.operator "onnx.Reshape"(%4777, %4784) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %4786 = torch.operator "onnx.Cast"(%4744) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %4787 = torch.operator "onnx.Mul"(%4786, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %4788 = torch.operator "onnx.Cast"(%4785) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %4789 = torch.operator "onnx.Mul"(%4788, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %4790 = torch.operator "onnx.Add"(%4787, %4789) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %4791 = torch.operator "onnx.Cast"(%4790) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %4792 = torch.operator "onnx.Shape"(%4745) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %4793 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_29_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %4794 = torch.operator "onnx.Gather"(%4792, %4793) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %4795 = torch.operator "onnx.Shape"(%4745) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %4796 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_30_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %4797 = torch.operator "onnx.Gather"(%4795, %4796) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %4798 = torch.operator "onnx.Shape"(%4745) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %4799 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_31_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %4800 = torch.operator "onnx.Gather"(%4798, %4799) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %4801 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9343_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4802 = torch.operator "onnx.Unsqueeze"(%4794, %4801) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4803 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9345_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4804 = torch.operator "onnx.Unsqueeze"(%4797, %4803) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4805 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9347_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4806 = torch.operator "onnx.Unsqueeze"(%4800, %4805) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4807 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4808 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4809 = torch.operator "onnx.Concat"(%4802, %4804, %4806, %4807, %4808) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %4810 = torch.operator "onnx.Reshape"(%4745, %4809) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %4811 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_34_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %4812:2 = torch.operator "onnx.Split"(%4810, %4811) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %4813 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4814 = torch.operator "onnx.Squeeze"(%4812#0, %4813) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %4815 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4816 = torch.operator "onnx.Squeeze"(%4812#1, %4815) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %4817 = torch.operator "onnx.Neg"(%4816) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %4818 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4819 = torch.operator "onnx.Unsqueeze"(%4817, %4818) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %4820 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4821 = torch.operator "onnx.Unsqueeze"(%4814, %4820) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %4822 = torch.operator "onnx.Concat"(%4819, %4821) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %4823 = torch.operator "onnx.Shape"(%4822) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %4824 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_39_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4825 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_40_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4826 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4827 = torch.operator "onnx.Slice"(%4823, %4825, %4826, %4824) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %4828 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_42_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4829 = torch.operator "onnx.Concat"(%4827, %4828) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %4830 = torch.operator "onnx.Reshape"(%4822, %4829) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %4831 = torch.operator "onnx.Cast"(%4745) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %4832 = torch.operator "onnx.Mul"(%4831, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %4833 = torch.operator "onnx.Cast"(%4830) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %4834 = torch.operator "onnx.Mul"(%4833, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %4835 = torch.operator "onnx.Add"(%4832, %4834) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %4836 = torch.operator "onnx.Cast"(%4835) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %4837 = torch.operator "onnx.Shape"(%4791) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %4838 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_43_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4839 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_44_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4840 = torch.operator "onnx.Slice"(%4837, %4838, %4839) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4841 = torch.operator "onnx.Cast"(%4840) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %4842 = torch.operator "onnx.Sqrt"(%4841) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %4843 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_45_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %4844 = torch.operator "onnx.Cast"(%4842) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %4845 = torch.operator "onnx.Div"(%4843, %4844) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %4846 = torch.operator "onnx.Cast"(%4845) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %4847 = torch.operator "onnx.Transpose"(%4836) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %4848 = torch.operator "onnx.Sqrt"(%4846) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %4849 = torch.operator "onnx.Mul"(%4791, %4848) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %4850 = torch.operator "onnx.Sqrt"(%4846) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %4851 = torch.operator "onnx.Mul"(%4847, %4850) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %4852 = torch.operator "onnx.MatMul"(%4849, %4851) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %4853 = torch.operator "onnx.Softmax"(%4852) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %4854 = torch.operator "onnx.MatMul"(%4853, %4746) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %4855 = torch.operator "onnx.Transpose"(%4854) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %4856 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_46_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %4857 = torch.operator "onnx.Mul"(%4631, %4856) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %4858 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9400_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4859 = torch.operator "onnx.Unsqueeze"(%4618, %4858) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4860 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_47_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4861 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9403_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4862 = torch.operator "onnx.Unsqueeze"(%4857, %4861) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4863 = torch.operator "onnx.Concat"(%4859, %4860, %4862) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %4864 = torch.operator "onnx.Reshape"(%4855, %4863) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %4865 = torch.operator "onnx.Cast"(%4864) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %4866 = torch.operator "onnx.Shape"(%4615) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %4867 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_48_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %4868 = torch.operator "onnx.Gather"(%4866, %4867) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %4869 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_49_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4870 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_50_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4871 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_51_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4872 = torch.operator "onnx.Unsqueeze"(%4868, %4871) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4873 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_52_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4874 = torch.operator "onnx.Slice"(%4865, %4870, %4872, %4869, %4873) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %4875 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_53_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4876 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_54_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4877 = torch.operator "onnx.Unsqueeze"(%4868, %4876) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4878 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_55_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4879 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_attn_Constant_56_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4880 = torch.operator "onnx.Slice"(%4865, %4877, %4878, %4875, %4879) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %4881 = torch.operator "onnx.MatMul"(%4880, %843) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %4882 = torch.operator "onnx.Add"(%186, %4881) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %4883 = torch.operator "onnx.MatMul"(%4874, %844) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %4884 = torch.operator "onnx.Add"(%187, %4883) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %4885 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4886 = torch.operator "onnx.Unsqueeze"(%4557, %4885) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %4887 = torch.operator "onnx.Mul"(%4886, %4882) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %4888 = torch.operator "onnx.Add"(%4503, %4887) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %_2Ftransformer_blocks.82Fnorm22FConstant_attr__value = util.global.load @"/transformer_blocks.8/norm2/Constant_attr__value" : tensor<3072xbf16> %4889 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.82Fnorm22FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.82Fnorm22FConstant_1_attr__value = util.global.load @"/transformer_blocks.8/norm2/Constant_1_attr__value" : tensor<3072xbf16> %4890 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.82Fnorm22FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %4891 = torch.operator "onnx.LayerNormalization"(%4888, %4889, %4890) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4892 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4893 = torch.operator "onnx.Unsqueeze"(%4563, %4892) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %4894 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %4895 = torch.operator "onnx.Add"(%4893, %4894) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %4896 = torch.operator "onnx.Mul"(%4891, %4895) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4897 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4898 = torch.operator "onnx.Unsqueeze"(%4560, %4897) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %4899 = torch.operator "onnx.Add"(%4896, %4898) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4900 = torch.operator "onnx.MatMul"(%4899, %845) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %4901 = torch.operator "onnx.Add"(%190, %4900) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %4902 = torch.operator "onnx.Mul"(%4901, %4901) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %4903 = torch.operator "onnx.Mul"(%4901, %4902) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %4904 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_ff_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %4905 = torch.operator "onnx.Mul"(%4904, %4903) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %4906 = torch.operator "onnx.Add"(%4901, %4905) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %4907 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_ff_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %4908 = torch.operator "onnx.Mul"(%4907, %4906) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %4909 = torch.operator "onnx.Tanh"(%4908) : (!torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %4910 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_ff_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %4911 = torch.operator "onnx.Add"(%4910, %4909) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %4912 = torch.operator "onnx.Mul"(%4901, %4911) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %4913 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_ff_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %4914 = torch.operator "onnx.Mul"(%4913, %4912) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %4915 = torch.operator "onnx.MatMul"(%4914, %846) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4916 = torch.operator "onnx.Add"(%191, %4915) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4917 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4918 = torch.operator "onnx.Unsqueeze"(%4566, %4917) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %4919 = torch.operator "onnx.Mul"(%4918, %4916) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4920 = torch.operator "onnx.Add"(%4888, %4919) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4921 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4922 = torch.operator "onnx.Unsqueeze"(%4595, %4921) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %4923 = torch.operator "onnx.Mul"(%4922, %4884) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %4924 = torch.operator "onnx.Add"(%4539, %4923) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %_2Ftransformer_blocks.82Fnorm2_context2FConstant_attr__value = util.global.load @"/transformer_blocks.8/norm2_context/Constant_attr__value" : tensor<3072xbf16> %4925 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.82Fnorm2_context2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.82Fnorm2_context2FConstant_1_attr__value = util.global.load @"/transformer_blocks.8/norm2_context/Constant_1_attr__value" : tensor<3072xbf16> %4926 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.82Fnorm2_context2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %4927 = torch.operator "onnx.LayerNormalization"(%4924, %4925, %4926) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4928 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4929 = torch.operator "onnx.Unsqueeze"(%4601, %4928) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %4930 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_Constant_7_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %4931 = torch.operator "onnx.Add"(%4929, %4930) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %4932 = torch.operator "onnx.Mul"(%4927, %4931) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4933 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4934 = torch.operator "onnx.Unsqueeze"(%4598, %4933) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %4935 = torch.operator "onnx.Add"(%4932, %4934) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4936 = torch.operator "onnx.MatMul"(%4935, %847) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %4937 = torch.operator "onnx.Add"(%192, %4936) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %4938 = torch.operator "onnx.Mul"(%4937, %4937) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %4939 = torch.operator "onnx.Mul"(%4937, %4938) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %4940 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_ff_context_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %4941 = torch.operator "onnx.Mul"(%4940, %4939) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %4942 = torch.operator "onnx.Add"(%4937, %4941) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %4943 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_ff_context_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %4944 = torch.operator "onnx.Mul"(%4943, %4942) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %4945 = torch.operator "onnx.Tanh"(%4944) : (!torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %4946 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_ff_context_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %4947 = torch.operator "onnx.Add"(%4946, %4945) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %4948 = torch.operator "onnx.Mul"(%4937, %4947) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %4949 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_ff_context_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %4950 = torch.operator "onnx.Mul"(%4949, %4948) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %4951 = torch.operator "onnx.MatMul"(%4950, %848) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4952 = torch.operator "onnx.Add"(%193, %4951) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4953 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.8_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4954 = torch.operator "onnx.Unsqueeze"(%4604, %4953) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %4955 = torch.operator "onnx.Mul"(%4954, %4952) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4956 = torch.operator "onnx.Add"(%4924, %4955) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %4957 = torch.operator "onnx.Gemm"(%1285, %194, %195) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %4958 = torch.operator "onnx.Shape"(%4957) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %4959 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_norm1_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4960 = torch.operator "onnx.Gather"(%4958, %4959) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4961 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_norm1_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4962 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_norm1_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4963 = torch.operator "onnx.Add"(%4960, %4962) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4964 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_norm1_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4965 = torch.operator "onnx.Div"(%4963, %4964) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4966 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_norm1_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4967 = torch.operator "onnx.Mul"(%4965, %4966) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4968 = torch.operator "onnx.Slice"(%4957, %4961, %4967, %4959) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %4969 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_norm1_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4970 = torch.operator "onnx.Mul"(%4965, %4969) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4971 = torch.operator "onnx.Slice"(%4957, %4967, %4970, %4959) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %4972 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_norm1_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4973 = torch.operator "onnx.Mul"(%4965, %4972) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4974 = torch.operator "onnx.Slice"(%4957, %4970, %4973, %4959) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %4975 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_norm1_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4976 = torch.operator "onnx.Mul"(%4965, %4975) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4977 = torch.operator "onnx.Slice"(%4957, %4973, %4976, %4959) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %4978 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_norm1_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4979 = torch.operator "onnx.Mul"(%4965, %4978) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4980 = torch.operator "onnx.Slice"(%4957, %4976, %4979, %4959) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %4981 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_norm1_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4982 = torch.operator "onnx.Mul"(%4965, %4981) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4983 = torch.operator "onnx.Slice"(%4957, %4979, %4982, %4959) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.92Fnorm12Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.9/norm1/norm/Constant_attr__value" : tensor<3072xbf16> %4984 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.92Fnorm12Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.92Fnorm12Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.9/norm1/norm/Constant_1_attr__value" : tensor<3072xbf16> %4985 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.92Fnorm12Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %4986 = torch.operator "onnx.LayerNormalization"(%4920, %4984, %4985) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4987 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_norm1_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4988 = torch.operator "onnx.Unsqueeze"(%4971, %4987) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %4989 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_norm1_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %4990 = torch.operator "onnx.Add"(%4988, %4989) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %4991 = torch.operator "onnx.Mul"(%4986, %4990) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4992 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_norm1_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4993 = torch.operator "onnx.Unsqueeze"(%4968, %4992) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %4994 = torch.operator "onnx.Add"(%4991, %4993) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %4995 = torch.operator "onnx.Gemm"(%1285, %196, %197) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %4996 = torch.operator "onnx.Shape"(%4995) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %4997 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_norm1_context_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %4998 = torch.operator "onnx.Gather"(%4996, %4997) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %4999 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_norm1_context_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5000 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_norm1_context_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5001 = torch.operator "onnx.Add"(%4998, %5000) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5002 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_norm1_context_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5003 = torch.operator "onnx.Div"(%5001, %5002) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5004 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_norm1_context_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5005 = torch.operator "onnx.Mul"(%5003, %5004) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5006 = torch.operator "onnx.Slice"(%4995, %4999, %5005, %4997) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %5007 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_norm1_context_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5008 = torch.operator "onnx.Mul"(%5003, %5007) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5009 = torch.operator "onnx.Slice"(%4995, %5005, %5008, %4997) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %5010 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_norm1_context_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5011 = torch.operator "onnx.Mul"(%5003, %5010) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5012 = torch.operator "onnx.Slice"(%4995, %5008, %5011, %4997) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %5013 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_norm1_context_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5014 = torch.operator "onnx.Mul"(%5003, %5013) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5015 = torch.operator "onnx.Slice"(%4995, %5011, %5014, %4997) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %5016 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_norm1_context_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5017 = torch.operator "onnx.Mul"(%5003, %5016) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5018 = torch.operator "onnx.Slice"(%4995, %5014, %5017, %4997) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %5019 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_norm1_context_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5020 = torch.operator "onnx.Mul"(%5003, %5019) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5021 = torch.operator "onnx.Slice"(%4995, %5017, %5020, %4997) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.92Fnorm1_context2Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.9/norm1_context/norm/Constant_attr__value" : tensor<3072xbf16> %5022 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.92Fnorm1_context2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.92Fnorm1_context2Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.9/norm1_context/norm/Constant_1_attr__value" : tensor<3072xbf16> %5023 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.92Fnorm1_context2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %5024 = torch.operator "onnx.LayerNormalization"(%4956, %5022, %5023) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5025 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_norm1_context_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5026 = torch.operator "onnx.Unsqueeze"(%5009, %5025) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %5027 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_norm1_context_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %5028 = torch.operator "onnx.Add"(%5026, %5027) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %5029 = torch.operator "onnx.Mul"(%5024, %5028) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5030 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_norm1_context_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5031 = torch.operator "onnx.Unsqueeze"(%5006, %5030) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %5032 = torch.operator "onnx.Add"(%5029, %5031) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5033 = torch.operator "onnx.Shape"(%5032) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %5034 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %5035 = torch.operator "onnx.Gather"(%5033, %5034) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %5036 = torch.operator "onnx.MatMul"(%4994, %849) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5037 = torch.operator "onnx.Add"(%200, %5036) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5038 = torch.operator "onnx.MatMul"(%4994, %850) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5039 = torch.operator "onnx.Add"(%201, %5038) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5040 = torch.operator "onnx.MatMul"(%4994, %851) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5041 = torch.operator "onnx.Add"(%202, %5040) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5042 = torch.operator "onnx.Shape"(%5039) : (!torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[3],si64> %5043 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %5044 = torch.operator "onnx.Gather"(%5042, %5043) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %5045 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %5046 = torch.operator "onnx.Div"(%5044, %5045) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %5047 = torch.operator "onnx.Cast"(%5046) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %5048 = torch.operator "onnx.Cast"(%5047) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %5049 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9591_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5050 = torch.operator "onnx.Unsqueeze"(%5035, %5049) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5051 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5052 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5053 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9595_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5054 = torch.operator "onnx.Unsqueeze"(%5048, %5053) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5055 = torch.operator "onnx.Concat"(%5050, %5051, %5052, %5054) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %5056 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9598_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5057 = torch.operator "onnx.Unsqueeze"(%5035, %5056) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5058 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5059 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5060 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9602_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5061 = torch.operator "onnx.Unsqueeze"(%5048, %5060) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5062 = torch.operator "onnx.Concat"(%5057, %5058, %5059, %5061) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %5063 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9605_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5064 = torch.operator "onnx.Unsqueeze"(%5035, %5063) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5065 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5066 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5067 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9609_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5068 = torch.operator "onnx.Unsqueeze"(%5048, %5067) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5069 = torch.operator "onnx.Concat"(%5064, %5065, %5066, %5068) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %5070 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9612_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5071 = torch.operator "onnx.Unsqueeze"(%5035, %5070) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5072 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5073 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5074 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9616_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5075 = torch.operator "onnx.Unsqueeze"(%5048, %5074) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5076 = torch.operator "onnx.Concat"(%5071, %5072, %5073, %5075) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %5077 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9619_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5078 = torch.operator "onnx.Unsqueeze"(%5035, %5077) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5079 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_11_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5080 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5081 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9623_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5082 = torch.operator "onnx.Unsqueeze"(%5048, %5081) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5083 = torch.operator "onnx.Concat"(%5078, %5079, %5080, %5082) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %5084 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9626_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5085 = torch.operator "onnx.Unsqueeze"(%5035, %5084) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5086 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5087 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_14_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5088 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9630_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5089 = torch.operator "onnx.Unsqueeze"(%5048, %5088) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5090 = torch.operator "onnx.Concat"(%5085, %5086, %5087, %5089) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %5091 = torch.operator "onnx.Reshape"(%5037, %5055) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %5092 = torch.operator "onnx.Transpose"(%5091) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %5093 = torch.operator "onnx.Reshape"(%5039, %5062) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %5094 = torch.operator "onnx.Transpose"(%5093) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %5095 = torch.operator "onnx.Reshape"(%5041, %5069) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %5096 = torch.operator "onnx.Transpose"(%5095) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %5097 = torch.operator "onnx.Cast"(%5092) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %5098 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %5099 = torch.operator "onnx.Pow"(%5097, %5098) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %5100 = torch.operator "onnx.ReduceMean"(%5099) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5101 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %5102 = torch.operator "onnx.Add"(%5100, %5101) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5103 = torch.operator "onnx.Sqrt"(%5102) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5104 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %5105 = torch.operator "onnx.Div"(%5104, %5103) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5106 = torch.operator "onnx.Cast"(%5092) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %5107 = torch.operator "onnx.Mul"(%5106, %5105) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %5108 = torch.operator "onnx.Cast"(%5107) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %5109 = torch.operator "onnx.Mul"(%5108, %198) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %5110 = torch.operator "onnx.Cast"(%5094) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %5111 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %5112 = torch.operator "onnx.Pow"(%5110, %5111) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %5113 = torch.operator "onnx.ReduceMean"(%5112) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5114 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %5115 = torch.operator "onnx.Add"(%5113, %5114) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5116 = torch.operator "onnx.Sqrt"(%5115) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5117 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %5118 = torch.operator "onnx.Div"(%5117, %5116) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5119 = torch.operator "onnx.Cast"(%5094) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %5120 = torch.operator "onnx.Mul"(%5119, %5118) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %5121 = torch.operator "onnx.Cast"(%5120) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %5122 = torch.operator "onnx.Mul"(%5121, %199) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %5123 = torch.operator "onnx.MatMul"(%5032, %852) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5124 = torch.operator "onnx.Add"(%205, %5123) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5125 = torch.operator "onnx.MatMul"(%5032, %853) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5126 = torch.operator "onnx.Add"(%203, %5125) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5127 = torch.operator "onnx.MatMul"(%5032, %854) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5128 = torch.operator "onnx.Add"(%204, %5127) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5129 = torch.operator "onnx.Reshape"(%5124, %5076) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %5130 = torch.operator "onnx.Transpose"(%5129) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %5131 = torch.operator "onnx.Reshape"(%5126, %5083) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %5132 = torch.operator "onnx.Transpose"(%5131) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %5133 = torch.operator "onnx.Reshape"(%5128, %5090) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %5134 = torch.operator "onnx.Transpose"(%5133) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %5135 = torch.operator "onnx.Cast"(%5130) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %5136 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_norm_added_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %5137 = torch.operator "onnx.Pow"(%5135, %5136) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %5138 = torch.operator "onnx.ReduceMean"(%5137) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5139 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_norm_added_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %5140 = torch.operator "onnx.Add"(%5138, %5139) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5141 = torch.operator "onnx.Sqrt"(%5140) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5142 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_norm_added_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %5143 = torch.operator "onnx.Div"(%5142, %5141) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5144 = torch.operator "onnx.Cast"(%5130) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %5145 = torch.operator "onnx.Mul"(%5144, %5143) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %5146 = torch.operator "onnx.Cast"(%5145) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %5147 = torch.operator "onnx.Mul"(%5146, %208) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %5148 = torch.operator "onnx.Cast"(%5132) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %5149 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_norm_added_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %5150 = torch.operator "onnx.Pow"(%5148, %5149) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %5151 = torch.operator "onnx.ReduceMean"(%5150) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5152 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_norm_added_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %5153 = torch.operator "onnx.Add"(%5151, %5152) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5154 = torch.operator "onnx.Sqrt"(%5153) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5155 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_norm_added_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %5156 = torch.operator "onnx.Div"(%5155, %5154) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5157 = torch.operator "onnx.Cast"(%5132) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %5158 = torch.operator "onnx.Mul"(%5157, %5156) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %5159 = torch.operator "onnx.Cast"(%5158) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %5160 = torch.operator "onnx.Mul"(%5159, %209) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %5161 = torch.operator "onnx.Concat"(%5147, %5109) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %5162 = torch.operator "onnx.Concat"(%5160, %5122) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %5163 = torch.operator "onnx.Concat"(%5134, %5096) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %5164 = torch.operator "onnx.Shape"(%5161) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %5165 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_15_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %5166 = torch.operator "onnx.Gather"(%5164, %5165) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %5167 = torch.operator "onnx.Shape"(%5161) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %5168 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_16_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %5169 = torch.operator "onnx.Gather"(%5167, %5168) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %5170 = torch.operator "onnx.Shape"(%5161) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %5171 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_17_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %5172 = torch.operator "onnx.Gather"(%5170, %5171) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %5173 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9715_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5174 = torch.operator "onnx.Unsqueeze"(%5166, %5173) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5175 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9717_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5176 = torch.operator "onnx.Unsqueeze"(%5169, %5175) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5177 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9719_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5178 = torch.operator "onnx.Unsqueeze"(%5172, %5177) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5179 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5180 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5181 = torch.operator "onnx.Concat"(%5174, %5176, %5178, %5179, %5180) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %5182 = torch.operator "onnx.Reshape"(%5161, %5181) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %5183 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_20_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %5184:2 = torch.operator "onnx.Split"(%5182, %5183) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %5185 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5186 = torch.operator "onnx.Squeeze"(%5184#0, %5185) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %5187 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5188 = torch.operator "onnx.Squeeze"(%5184#1, %5187) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %5189 = torch.operator "onnx.Neg"(%5188) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %5190 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_23_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5191 = torch.operator "onnx.Unsqueeze"(%5189, %5190) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %5192 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_24_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5193 = torch.operator "onnx.Unsqueeze"(%5186, %5192) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %5194 = torch.operator "onnx.Concat"(%5191, %5193) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %5195 = torch.operator "onnx.Shape"(%5194) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %5196 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_25_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5197 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5198 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5199 = torch.operator "onnx.Slice"(%5195, %5197, %5198, %5196) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %5200 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_28_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5201 = torch.operator "onnx.Concat"(%5199, %5200) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %5202 = torch.operator "onnx.Reshape"(%5194, %5201) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %5203 = torch.operator "onnx.Cast"(%5161) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %5204 = torch.operator "onnx.Mul"(%5203, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %5205 = torch.operator "onnx.Cast"(%5202) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %5206 = torch.operator "onnx.Mul"(%5205, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %5207 = torch.operator "onnx.Add"(%5204, %5206) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %5208 = torch.operator "onnx.Cast"(%5207) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %5209 = torch.operator "onnx.Shape"(%5162) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %5210 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_29_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %5211 = torch.operator "onnx.Gather"(%5209, %5210) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %5212 = torch.operator "onnx.Shape"(%5162) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %5213 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_30_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %5214 = torch.operator "onnx.Gather"(%5212, %5213) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %5215 = torch.operator "onnx.Shape"(%5162) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %5216 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_31_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %5217 = torch.operator "onnx.Gather"(%5215, %5216) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %5218 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9760_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5219 = torch.operator "onnx.Unsqueeze"(%5211, %5218) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5220 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9762_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5221 = torch.operator "onnx.Unsqueeze"(%5214, %5220) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5222 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9764_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5223 = torch.operator "onnx.Unsqueeze"(%5217, %5222) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5224 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5225 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5226 = torch.operator "onnx.Concat"(%5219, %5221, %5223, %5224, %5225) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %5227 = torch.operator "onnx.Reshape"(%5162, %5226) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %5228 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_34_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %5229:2 = torch.operator "onnx.Split"(%5227, %5228) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %5230 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5231 = torch.operator "onnx.Squeeze"(%5229#0, %5230) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %5232 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5233 = torch.operator "onnx.Squeeze"(%5229#1, %5232) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %5234 = torch.operator "onnx.Neg"(%5233) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %5235 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5236 = torch.operator "onnx.Unsqueeze"(%5234, %5235) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %5237 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5238 = torch.operator "onnx.Unsqueeze"(%5231, %5237) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %5239 = torch.operator "onnx.Concat"(%5236, %5238) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %5240 = torch.operator "onnx.Shape"(%5239) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %5241 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_39_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5242 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_40_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5243 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5244 = torch.operator "onnx.Slice"(%5240, %5242, %5243, %5241) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %5245 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_42_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5246 = torch.operator "onnx.Concat"(%5244, %5245) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %5247 = torch.operator "onnx.Reshape"(%5239, %5246) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %5248 = torch.operator "onnx.Cast"(%5162) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %5249 = torch.operator "onnx.Mul"(%5248, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %5250 = torch.operator "onnx.Cast"(%5247) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %5251 = torch.operator "onnx.Mul"(%5250, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %5252 = torch.operator "onnx.Add"(%5249, %5251) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %5253 = torch.operator "onnx.Cast"(%5252) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %5254 = torch.operator "onnx.Shape"(%5208) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %5255 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_43_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5256 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_44_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5257 = torch.operator "onnx.Slice"(%5254, %5255, %5256) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5258 = torch.operator "onnx.Cast"(%5257) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %5259 = torch.operator "onnx.Sqrt"(%5258) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %5260 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_45_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %5261 = torch.operator "onnx.Cast"(%5259) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %5262 = torch.operator "onnx.Div"(%5260, %5261) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %5263 = torch.operator "onnx.Cast"(%5262) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %5264 = torch.operator "onnx.Transpose"(%5253) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %5265 = torch.operator "onnx.Sqrt"(%5263) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %5266 = torch.operator "onnx.Mul"(%5208, %5265) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %5267 = torch.operator "onnx.Sqrt"(%5263) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %5268 = torch.operator "onnx.Mul"(%5264, %5267) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %5269 = torch.operator "onnx.MatMul"(%5266, %5268) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %5270 = torch.operator "onnx.Softmax"(%5269) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %5271 = torch.operator "onnx.MatMul"(%5270, %5163) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %5272 = torch.operator "onnx.Transpose"(%5271) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %5273 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_46_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %5274 = torch.operator "onnx.Mul"(%5048, %5273) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %5275 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9817_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5276 = torch.operator "onnx.Unsqueeze"(%5035, %5275) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5277 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_47_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5278 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_9820_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5279 = torch.operator "onnx.Unsqueeze"(%5274, %5278) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5280 = torch.operator "onnx.Concat"(%5276, %5277, %5279) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %5281 = torch.operator "onnx.Reshape"(%5272, %5280) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %5282 = torch.operator "onnx.Cast"(%5281) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %5283 = torch.operator "onnx.Shape"(%5032) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %5284 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_48_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %5285 = torch.operator "onnx.Gather"(%5283, %5284) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %5286 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_49_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5287 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_50_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5288 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_51_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5289 = torch.operator "onnx.Unsqueeze"(%5285, %5288) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5290 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_52_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5291 = torch.operator "onnx.Slice"(%5282, %5287, %5289, %5286, %5290) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %5292 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_53_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5293 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_54_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5294 = torch.operator "onnx.Unsqueeze"(%5285, %5293) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5295 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_55_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5296 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_attn_Constant_56_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5297 = torch.operator "onnx.Slice"(%5282, %5294, %5295, %5292, %5296) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %5298 = torch.operator "onnx.MatMul"(%5297, %855) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %5299 = torch.operator "onnx.Add"(%206, %5298) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %5300 = torch.operator "onnx.MatMul"(%5291, %856) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %5301 = torch.operator "onnx.Add"(%207, %5300) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %5302 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5303 = torch.operator "onnx.Unsqueeze"(%4974, %5302) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %5304 = torch.operator "onnx.Mul"(%5303, %5299) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %5305 = torch.operator "onnx.Add"(%4920, %5304) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %_2Ftransformer_blocks.92Fnorm22FConstant_attr__value = util.global.load @"/transformer_blocks.9/norm2/Constant_attr__value" : tensor<3072xbf16> %5306 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.92Fnorm22FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.92Fnorm22FConstant_1_attr__value = util.global.load @"/transformer_blocks.9/norm2/Constant_1_attr__value" : tensor<3072xbf16> %5307 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.92Fnorm22FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %5308 = torch.operator "onnx.LayerNormalization"(%5305, %5306, %5307) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5309 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5310 = torch.operator "onnx.Unsqueeze"(%4980, %5309) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %5311 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %5312 = torch.operator "onnx.Add"(%5310, %5311) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %5313 = torch.operator "onnx.Mul"(%5308, %5312) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5314 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5315 = torch.operator "onnx.Unsqueeze"(%4977, %5314) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %5316 = torch.operator "onnx.Add"(%5313, %5315) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5317 = torch.operator "onnx.MatMul"(%5316, %857) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %5318 = torch.operator "onnx.Add"(%210, %5317) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %5319 = torch.operator "onnx.Mul"(%5318, %5318) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %5320 = torch.operator "onnx.Mul"(%5318, %5319) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %5321 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_ff_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %5322 = torch.operator "onnx.Mul"(%5321, %5320) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %5323 = torch.operator "onnx.Add"(%5318, %5322) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %5324 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_ff_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %5325 = torch.operator "onnx.Mul"(%5324, %5323) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %5326 = torch.operator "onnx.Tanh"(%5325) : (!torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %5327 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_ff_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %5328 = torch.operator "onnx.Add"(%5327, %5326) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %5329 = torch.operator "onnx.Mul"(%5318, %5328) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %5330 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_ff_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %5331 = torch.operator "onnx.Mul"(%5330, %5329) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %5332 = torch.operator "onnx.MatMul"(%5331, %858) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5333 = torch.operator "onnx.Add"(%211, %5332) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5334 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5335 = torch.operator "onnx.Unsqueeze"(%4983, %5334) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %5336 = torch.operator "onnx.Mul"(%5335, %5333) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5337 = torch.operator "onnx.Add"(%5305, %5336) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5338 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5339 = torch.operator "onnx.Unsqueeze"(%5012, %5338) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %5340 = torch.operator "onnx.Mul"(%5339, %5301) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %5341 = torch.operator "onnx.Add"(%4956, %5340) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %_2Ftransformer_blocks.92Fnorm2_context2FConstant_attr__value = util.global.load @"/transformer_blocks.9/norm2_context/Constant_attr__value" : tensor<3072xbf16> %5342 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.92Fnorm2_context2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.92Fnorm2_context2FConstant_1_attr__value = util.global.load @"/transformer_blocks.9/norm2_context/Constant_1_attr__value" : tensor<3072xbf16> %5343 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.92Fnorm2_context2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %5344 = torch.operator "onnx.LayerNormalization"(%5341, %5342, %5343) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5345 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5346 = torch.operator "onnx.Unsqueeze"(%5018, %5345) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %5347 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_Constant_7_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %5348 = torch.operator "onnx.Add"(%5346, %5347) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %5349 = torch.operator "onnx.Mul"(%5344, %5348) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5350 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5351 = torch.operator "onnx.Unsqueeze"(%5015, %5350) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %5352 = torch.operator "onnx.Add"(%5349, %5351) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5353 = torch.operator "onnx.MatMul"(%5352, %859) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %5354 = torch.operator "onnx.Add"(%212, %5353) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %5355 = torch.operator "onnx.Mul"(%5354, %5354) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %5356 = torch.operator "onnx.Mul"(%5354, %5355) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %5357 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_ff_context_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %5358 = torch.operator "onnx.Mul"(%5357, %5356) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %5359 = torch.operator "onnx.Add"(%5354, %5358) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %5360 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_ff_context_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %5361 = torch.operator "onnx.Mul"(%5360, %5359) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %5362 = torch.operator "onnx.Tanh"(%5361) : (!torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %5363 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_ff_context_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %5364 = torch.operator "onnx.Add"(%5363, %5362) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %5365 = torch.operator "onnx.Mul"(%5354, %5364) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %5366 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_ff_context_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %5367 = torch.operator "onnx.Mul"(%5366, %5365) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %5368 = torch.operator "onnx.MatMul"(%5367, %860) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5369 = torch.operator "onnx.Add"(%213, %5368) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5370 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.9_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5371 = torch.operator "onnx.Unsqueeze"(%5021, %5370) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %5372 = torch.operator "onnx.Mul"(%5371, %5369) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5373 = torch.operator "onnx.Add"(%5341, %5372) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5374 = torch.operator "onnx.Gemm"(%1285, %214, %215) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %5375 = torch.operator "onnx.Shape"(%5374) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %5376 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_norm1_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5377 = torch.operator "onnx.Gather"(%5375, %5376) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5378 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_norm1_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5379 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_norm1_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5380 = torch.operator "onnx.Add"(%5377, %5379) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5381 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_norm1_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5382 = torch.operator "onnx.Div"(%5380, %5381) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5383 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_norm1_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5384 = torch.operator "onnx.Mul"(%5382, %5383) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5385 = torch.operator "onnx.Slice"(%5374, %5378, %5384, %5376) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %5386 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_norm1_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5387 = torch.operator "onnx.Mul"(%5382, %5386) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5388 = torch.operator "onnx.Slice"(%5374, %5384, %5387, %5376) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %5389 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_norm1_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5390 = torch.operator "onnx.Mul"(%5382, %5389) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5391 = torch.operator "onnx.Slice"(%5374, %5387, %5390, %5376) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %5392 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_norm1_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5393 = torch.operator "onnx.Mul"(%5382, %5392) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5394 = torch.operator "onnx.Slice"(%5374, %5390, %5393, %5376) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %5395 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_norm1_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5396 = torch.operator "onnx.Mul"(%5382, %5395) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5397 = torch.operator "onnx.Slice"(%5374, %5393, %5396, %5376) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %5398 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_norm1_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5399 = torch.operator "onnx.Mul"(%5382, %5398) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5400 = torch.operator "onnx.Slice"(%5374, %5396, %5399, %5376) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.102Fnorm12Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.10/norm1/norm/Constant_attr__value" : tensor<3072xbf16> %5401 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.102Fnorm12Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.102Fnorm12Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.10/norm1/norm/Constant_1_attr__value" : tensor<3072xbf16> %5402 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.102Fnorm12Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %5403 = torch.operator "onnx.LayerNormalization"(%5337, %5401, %5402) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5404 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_norm1_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5405 = torch.operator "onnx.Unsqueeze"(%5388, %5404) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %5406 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_norm1_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %5407 = torch.operator "onnx.Add"(%5405, %5406) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %5408 = torch.operator "onnx.Mul"(%5403, %5407) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5409 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_norm1_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5410 = torch.operator "onnx.Unsqueeze"(%5385, %5409) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %5411 = torch.operator "onnx.Add"(%5408, %5410) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5412 = torch.operator "onnx.Gemm"(%1285, %216, %217) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %5413 = torch.operator "onnx.Shape"(%5412) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %5414 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_norm1_context_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5415 = torch.operator "onnx.Gather"(%5413, %5414) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5416 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_norm1_context_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5417 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_norm1_context_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5418 = torch.operator "onnx.Add"(%5415, %5417) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5419 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_norm1_context_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5420 = torch.operator "onnx.Div"(%5418, %5419) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5421 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_norm1_context_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5422 = torch.operator "onnx.Mul"(%5420, %5421) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5423 = torch.operator "onnx.Slice"(%5412, %5416, %5422, %5414) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %5424 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_norm1_context_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5425 = torch.operator "onnx.Mul"(%5420, %5424) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5426 = torch.operator "onnx.Slice"(%5412, %5422, %5425, %5414) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %5427 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_norm1_context_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5428 = torch.operator "onnx.Mul"(%5420, %5427) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5429 = torch.operator "onnx.Slice"(%5412, %5425, %5428, %5414) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %5430 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_norm1_context_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5431 = torch.operator "onnx.Mul"(%5420, %5430) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5432 = torch.operator "onnx.Slice"(%5412, %5428, %5431, %5414) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %5433 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_norm1_context_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5434 = torch.operator "onnx.Mul"(%5420, %5433) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5435 = torch.operator "onnx.Slice"(%5412, %5431, %5434, %5414) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %5436 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_norm1_context_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5437 = torch.operator "onnx.Mul"(%5420, %5436) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5438 = torch.operator "onnx.Slice"(%5412, %5434, %5437, %5414) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.102Fnorm1_context2Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.10/norm1_context/norm/Constant_attr__value" : tensor<3072xbf16> %5439 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.102Fnorm1_context2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.102Fnorm1_context2Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.10/norm1_context/norm/Constant_1_attr__value" : tensor<3072xbf16> %5440 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.102Fnorm1_context2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %5441 = torch.operator "onnx.LayerNormalization"(%5373, %5439, %5440) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5442 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_norm1_context_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5443 = torch.operator "onnx.Unsqueeze"(%5426, %5442) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %5444 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_norm1_context_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %5445 = torch.operator "onnx.Add"(%5443, %5444) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %5446 = torch.operator "onnx.Mul"(%5441, %5445) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5447 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_norm1_context_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5448 = torch.operator "onnx.Unsqueeze"(%5423, %5447) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %5449 = torch.operator "onnx.Add"(%5446, %5448) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5450 = torch.operator "onnx.Shape"(%5449) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %5451 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %5452 = torch.operator "onnx.Gather"(%5450, %5451) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %5453 = torch.operator "onnx.MatMul"(%5411, %861) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5454 = torch.operator "onnx.Add"(%220, %5453) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5455 = torch.operator "onnx.MatMul"(%5411, %862) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5456 = torch.operator "onnx.Add"(%221, %5455) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5457 = torch.operator "onnx.MatMul"(%5411, %863) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5458 = torch.operator "onnx.Add"(%222, %5457) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5459 = torch.operator "onnx.Shape"(%5456) : (!torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[3],si64> %5460 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %5461 = torch.operator "onnx.Gather"(%5459, %5460) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %5462 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %5463 = torch.operator "onnx.Div"(%5461, %5462) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %5464 = torch.operator "onnx.Cast"(%5463) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %5465 = torch.operator "onnx.Cast"(%5464) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %5466 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10008_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5467 = torch.operator "onnx.Unsqueeze"(%5452, %5466) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5468 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5469 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5470 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10012_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5471 = torch.operator "onnx.Unsqueeze"(%5465, %5470) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5472 = torch.operator "onnx.Concat"(%5467, %5468, %5469, %5471) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %5473 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10015_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5474 = torch.operator "onnx.Unsqueeze"(%5452, %5473) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5475 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5476 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5477 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10019_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5478 = torch.operator "onnx.Unsqueeze"(%5465, %5477) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5479 = torch.operator "onnx.Concat"(%5474, %5475, %5476, %5478) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %5480 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10022_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5481 = torch.operator "onnx.Unsqueeze"(%5452, %5480) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5482 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5483 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5484 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10026_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5485 = torch.operator "onnx.Unsqueeze"(%5465, %5484) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5486 = torch.operator "onnx.Concat"(%5481, %5482, %5483, %5485) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %5487 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10029_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5488 = torch.operator "onnx.Unsqueeze"(%5452, %5487) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5489 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5490 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5491 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10033_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5492 = torch.operator "onnx.Unsqueeze"(%5465, %5491) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5493 = torch.operator "onnx.Concat"(%5488, %5489, %5490, %5492) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %5494 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10036_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5495 = torch.operator "onnx.Unsqueeze"(%5452, %5494) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5496 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_11_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5497 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5498 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10040_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5499 = torch.operator "onnx.Unsqueeze"(%5465, %5498) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5500 = torch.operator "onnx.Concat"(%5495, %5496, %5497, %5499) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %5501 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10043_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5502 = torch.operator "onnx.Unsqueeze"(%5452, %5501) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5503 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5504 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_14_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5505 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10047_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5506 = torch.operator "onnx.Unsqueeze"(%5465, %5505) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5507 = torch.operator "onnx.Concat"(%5502, %5503, %5504, %5506) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %5508 = torch.operator "onnx.Reshape"(%5454, %5472) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %5509 = torch.operator "onnx.Transpose"(%5508) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %5510 = torch.operator "onnx.Reshape"(%5456, %5479) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %5511 = torch.operator "onnx.Transpose"(%5510) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %5512 = torch.operator "onnx.Reshape"(%5458, %5486) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %5513 = torch.operator "onnx.Transpose"(%5512) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %5514 = torch.operator "onnx.Cast"(%5509) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %5515 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %5516 = torch.operator "onnx.Pow"(%5514, %5515) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %5517 = torch.operator "onnx.ReduceMean"(%5516) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5518 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %5519 = torch.operator "onnx.Add"(%5517, %5518) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5520 = torch.operator "onnx.Sqrt"(%5519) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5521 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %5522 = torch.operator "onnx.Div"(%5521, %5520) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5523 = torch.operator "onnx.Cast"(%5509) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %5524 = torch.operator "onnx.Mul"(%5523, %5522) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %5525 = torch.operator "onnx.Cast"(%5524) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %5526 = torch.operator "onnx.Mul"(%5525, %218) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %5527 = torch.operator "onnx.Cast"(%5511) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %5528 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %5529 = torch.operator "onnx.Pow"(%5527, %5528) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %5530 = torch.operator "onnx.ReduceMean"(%5529) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5531 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %5532 = torch.operator "onnx.Add"(%5530, %5531) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5533 = torch.operator "onnx.Sqrt"(%5532) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5534 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %5535 = torch.operator "onnx.Div"(%5534, %5533) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5536 = torch.operator "onnx.Cast"(%5511) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %5537 = torch.operator "onnx.Mul"(%5536, %5535) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %5538 = torch.operator "onnx.Cast"(%5537) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %5539 = torch.operator "onnx.Mul"(%5538, %219) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %5540 = torch.operator "onnx.MatMul"(%5449, %864) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5541 = torch.operator "onnx.Add"(%225, %5540) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5542 = torch.operator "onnx.MatMul"(%5449, %865) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5543 = torch.operator "onnx.Add"(%223, %5542) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5544 = torch.operator "onnx.MatMul"(%5449, %866) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5545 = torch.operator "onnx.Add"(%224, %5544) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5546 = torch.operator "onnx.Reshape"(%5541, %5493) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %5547 = torch.operator "onnx.Transpose"(%5546) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %5548 = torch.operator "onnx.Reshape"(%5543, %5500) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %5549 = torch.operator "onnx.Transpose"(%5548) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %5550 = torch.operator "onnx.Reshape"(%5545, %5507) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %5551 = torch.operator "onnx.Transpose"(%5550) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %5552 = torch.operator "onnx.Cast"(%5547) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %5553 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_norm_added_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %5554 = torch.operator "onnx.Pow"(%5552, %5553) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %5555 = torch.operator "onnx.ReduceMean"(%5554) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5556 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_norm_added_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %5557 = torch.operator "onnx.Add"(%5555, %5556) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5558 = torch.operator "onnx.Sqrt"(%5557) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5559 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_norm_added_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %5560 = torch.operator "onnx.Div"(%5559, %5558) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5561 = torch.operator "onnx.Cast"(%5547) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %5562 = torch.operator "onnx.Mul"(%5561, %5560) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %5563 = torch.operator "onnx.Cast"(%5562) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %5564 = torch.operator "onnx.Mul"(%5563, %228) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %5565 = torch.operator "onnx.Cast"(%5549) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %5566 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_norm_added_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %5567 = torch.operator "onnx.Pow"(%5565, %5566) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %5568 = torch.operator "onnx.ReduceMean"(%5567) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5569 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_norm_added_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %5570 = torch.operator "onnx.Add"(%5568, %5569) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5571 = torch.operator "onnx.Sqrt"(%5570) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5572 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_norm_added_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %5573 = torch.operator "onnx.Div"(%5572, %5571) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5574 = torch.operator "onnx.Cast"(%5549) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %5575 = torch.operator "onnx.Mul"(%5574, %5573) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %5576 = torch.operator "onnx.Cast"(%5575) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %5577 = torch.operator "onnx.Mul"(%5576, %229) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %5578 = torch.operator "onnx.Concat"(%5564, %5526) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %5579 = torch.operator "onnx.Concat"(%5577, %5539) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %5580 = torch.operator "onnx.Concat"(%5551, %5513) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %5581 = torch.operator "onnx.Shape"(%5578) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %5582 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_15_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %5583 = torch.operator "onnx.Gather"(%5581, %5582) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %5584 = torch.operator "onnx.Shape"(%5578) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %5585 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_16_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %5586 = torch.operator "onnx.Gather"(%5584, %5585) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %5587 = torch.operator "onnx.Shape"(%5578) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %5588 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_17_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %5589 = torch.operator "onnx.Gather"(%5587, %5588) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %5590 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10132_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5591 = torch.operator "onnx.Unsqueeze"(%5583, %5590) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5592 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10134_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5593 = torch.operator "onnx.Unsqueeze"(%5586, %5592) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5594 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10136_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5595 = torch.operator "onnx.Unsqueeze"(%5589, %5594) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5596 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5597 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5598 = torch.operator "onnx.Concat"(%5591, %5593, %5595, %5596, %5597) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %5599 = torch.operator "onnx.Reshape"(%5578, %5598) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %5600 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_20_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %5601:2 = torch.operator "onnx.Split"(%5599, %5600) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %5602 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5603 = torch.operator "onnx.Squeeze"(%5601#0, %5602) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %5604 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5605 = torch.operator "onnx.Squeeze"(%5601#1, %5604) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %5606 = torch.operator "onnx.Neg"(%5605) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %5607 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_23_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5608 = torch.operator "onnx.Unsqueeze"(%5606, %5607) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %5609 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_24_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5610 = torch.operator "onnx.Unsqueeze"(%5603, %5609) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %5611 = torch.operator "onnx.Concat"(%5608, %5610) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %5612 = torch.operator "onnx.Shape"(%5611) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %5613 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_25_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5614 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5615 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5616 = torch.operator "onnx.Slice"(%5612, %5614, %5615, %5613) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %5617 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_28_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5618 = torch.operator "onnx.Concat"(%5616, %5617) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %5619 = torch.operator "onnx.Reshape"(%5611, %5618) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %5620 = torch.operator "onnx.Cast"(%5578) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %5621 = torch.operator "onnx.Mul"(%5620, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %5622 = torch.operator "onnx.Cast"(%5619) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %5623 = torch.operator "onnx.Mul"(%5622, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %5624 = torch.operator "onnx.Add"(%5621, %5623) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %5625 = torch.operator "onnx.Cast"(%5624) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %5626 = torch.operator "onnx.Shape"(%5579) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %5627 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_29_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %5628 = torch.operator "onnx.Gather"(%5626, %5627) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %5629 = torch.operator "onnx.Shape"(%5579) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %5630 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_30_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %5631 = torch.operator "onnx.Gather"(%5629, %5630) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %5632 = torch.operator "onnx.Shape"(%5579) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %5633 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_31_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %5634 = torch.operator "onnx.Gather"(%5632, %5633) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %5635 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10177_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5636 = torch.operator "onnx.Unsqueeze"(%5628, %5635) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5637 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10179_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5638 = torch.operator "onnx.Unsqueeze"(%5631, %5637) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5639 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10181_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5640 = torch.operator "onnx.Unsqueeze"(%5634, %5639) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5641 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5642 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5643 = torch.operator "onnx.Concat"(%5636, %5638, %5640, %5641, %5642) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %5644 = torch.operator "onnx.Reshape"(%5579, %5643) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %5645 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_34_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %5646:2 = torch.operator "onnx.Split"(%5644, %5645) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %5647 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5648 = torch.operator "onnx.Squeeze"(%5646#0, %5647) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %5649 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5650 = torch.operator "onnx.Squeeze"(%5646#1, %5649) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %5651 = torch.operator "onnx.Neg"(%5650) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %5652 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5653 = torch.operator "onnx.Unsqueeze"(%5651, %5652) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %5654 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5655 = torch.operator "onnx.Unsqueeze"(%5648, %5654) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %5656 = torch.operator "onnx.Concat"(%5653, %5655) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %5657 = torch.operator "onnx.Shape"(%5656) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %5658 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_39_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5659 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_40_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5660 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5661 = torch.operator "onnx.Slice"(%5657, %5659, %5660, %5658) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %5662 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_42_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5663 = torch.operator "onnx.Concat"(%5661, %5662) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %5664 = torch.operator "onnx.Reshape"(%5656, %5663) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %5665 = torch.operator "onnx.Cast"(%5579) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %5666 = torch.operator "onnx.Mul"(%5665, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %5667 = torch.operator "onnx.Cast"(%5664) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %5668 = torch.operator "onnx.Mul"(%5667, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %5669 = torch.operator "onnx.Add"(%5666, %5668) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %5670 = torch.operator "onnx.Cast"(%5669) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %5671 = torch.operator "onnx.Shape"(%5625) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %5672 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_43_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5673 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_44_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5674 = torch.operator "onnx.Slice"(%5671, %5672, %5673) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5675 = torch.operator "onnx.Cast"(%5674) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %5676 = torch.operator "onnx.Sqrt"(%5675) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %5677 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_45_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %5678 = torch.operator "onnx.Cast"(%5676) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %5679 = torch.operator "onnx.Div"(%5677, %5678) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %5680 = torch.operator "onnx.Cast"(%5679) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %5681 = torch.operator "onnx.Transpose"(%5670) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %5682 = torch.operator "onnx.Sqrt"(%5680) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %5683 = torch.operator "onnx.Mul"(%5625, %5682) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %5684 = torch.operator "onnx.Sqrt"(%5680) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %5685 = torch.operator "onnx.Mul"(%5681, %5684) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %5686 = torch.operator "onnx.MatMul"(%5683, %5685) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %5687 = torch.operator "onnx.Softmax"(%5686) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %5688 = torch.operator "onnx.MatMul"(%5687, %5580) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %5689 = torch.operator "onnx.Transpose"(%5688) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %5690 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_46_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %5691 = torch.operator "onnx.Mul"(%5465, %5690) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %5692 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10234_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5693 = torch.operator "onnx.Unsqueeze"(%5452, %5692) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5694 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_47_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5695 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10237_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5696 = torch.operator "onnx.Unsqueeze"(%5691, %5695) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5697 = torch.operator "onnx.Concat"(%5693, %5694, %5696) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %5698 = torch.operator "onnx.Reshape"(%5689, %5697) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %5699 = torch.operator "onnx.Cast"(%5698) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %5700 = torch.operator "onnx.Shape"(%5449) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %5701 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_48_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %5702 = torch.operator "onnx.Gather"(%5700, %5701) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %5703 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_49_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5704 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_50_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5705 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_51_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5706 = torch.operator "onnx.Unsqueeze"(%5702, %5705) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5707 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_52_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5708 = torch.operator "onnx.Slice"(%5699, %5704, %5706, %5703, %5707) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %5709 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_53_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5710 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_54_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5711 = torch.operator "onnx.Unsqueeze"(%5702, %5710) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5712 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_55_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5713 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_attn_Constant_56_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5714 = torch.operator "onnx.Slice"(%5699, %5711, %5712, %5709, %5713) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %5715 = torch.operator "onnx.MatMul"(%5714, %867) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %5716 = torch.operator "onnx.Add"(%226, %5715) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %5717 = torch.operator "onnx.MatMul"(%5708, %868) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %5718 = torch.operator "onnx.Add"(%227, %5717) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %5719 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5720 = torch.operator "onnx.Unsqueeze"(%5391, %5719) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %5721 = torch.operator "onnx.Mul"(%5720, %5716) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %5722 = torch.operator "onnx.Add"(%5337, %5721) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %_2Ftransformer_blocks.102Fnorm22FConstant_attr__value = util.global.load @"/transformer_blocks.10/norm2/Constant_attr__value" : tensor<3072xbf16> %5723 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.102Fnorm22FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.102Fnorm22FConstant_1_attr__value = util.global.load @"/transformer_blocks.10/norm2/Constant_1_attr__value" : tensor<3072xbf16> %5724 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.102Fnorm22FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %5725 = torch.operator "onnx.LayerNormalization"(%5722, %5723, %5724) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5726 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5727 = torch.operator "onnx.Unsqueeze"(%5397, %5726) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %5728 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %5729 = torch.operator "onnx.Add"(%5727, %5728) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %5730 = torch.operator "onnx.Mul"(%5725, %5729) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5731 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5732 = torch.operator "onnx.Unsqueeze"(%5394, %5731) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %5733 = torch.operator "onnx.Add"(%5730, %5732) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5734 = torch.operator "onnx.MatMul"(%5733, %869) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %5735 = torch.operator "onnx.Add"(%230, %5734) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %5736 = torch.operator "onnx.Mul"(%5735, %5735) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %5737 = torch.operator "onnx.Mul"(%5735, %5736) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %5738 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_ff_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %5739 = torch.operator "onnx.Mul"(%5738, %5737) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %5740 = torch.operator "onnx.Add"(%5735, %5739) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %5741 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_ff_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %5742 = torch.operator "onnx.Mul"(%5741, %5740) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %5743 = torch.operator "onnx.Tanh"(%5742) : (!torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %5744 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_ff_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %5745 = torch.operator "onnx.Add"(%5744, %5743) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %5746 = torch.operator "onnx.Mul"(%5735, %5745) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %5747 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_ff_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %5748 = torch.operator "onnx.Mul"(%5747, %5746) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %5749 = torch.operator "onnx.MatMul"(%5748, %870) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5750 = torch.operator "onnx.Add"(%231, %5749) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5751 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5752 = torch.operator "onnx.Unsqueeze"(%5400, %5751) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %5753 = torch.operator "onnx.Mul"(%5752, %5750) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5754 = torch.operator "onnx.Add"(%5722, %5753) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5755 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5756 = torch.operator "onnx.Unsqueeze"(%5429, %5755) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %5757 = torch.operator "onnx.Mul"(%5756, %5718) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %5758 = torch.operator "onnx.Add"(%5373, %5757) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %_2Ftransformer_blocks.102Fnorm2_context2FConstant_attr__value = util.global.load @"/transformer_blocks.10/norm2_context/Constant_attr__value" : tensor<3072xbf16> %5759 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.102Fnorm2_context2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.102Fnorm2_context2FConstant_1_attr__value = util.global.load @"/transformer_blocks.10/norm2_context/Constant_1_attr__value" : tensor<3072xbf16> %5760 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.102Fnorm2_context2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %5761 = torch.operator "onnx.LayerNormalization"(%5758, %5759, %5760) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5762 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5763 = torch.operator "onnx.Unsqueeze"(%5435, %5762) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %5764 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_Constant_7_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %5765 = torch.operator "onnx.Add"(%5763, %5764) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %5766 = torch.operator "onnx.Mul"(%5761, %5765) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5767 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5768 = torch.operator "onnx.Unsqueeze"(%5432, %5767) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %5769 = torch.operator "onnx.Add"(%5766, %5768) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5770 = torch.operator "onnx.MatMul"(%5769, %871) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %5771 = torch.operator "onnx.Add"(%232, %5770) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %5772 = torch.operator "onnx.Mul"(%5771, %5771) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %5773 = torch.operator "onnx.Mul"(%5771, %5772) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %5774 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_ff_context_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %5775 = torch.operator "onnx.Mul"(%5774, %5773) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %5776 = torch.operator "onnx.Add"(%5771, %5775) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %5777 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_ff_context_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %5778 = torch.operator "onnx.Mul"(%5777, %5776) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %5779 = torch.operator "onnx.Tanh"(%5778) : (!torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %5780 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_ff_context_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %5781 = torch.operator "onnx.Add"(%5780, %5779) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %5782 = torch.operator "onnx.Mul"(%5771, %5781) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %5783 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_ff_context_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %5784 = torch.operator "onnx.Mul"(%5783, %5782) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %5785 = torch.operator "onnx.MatMul"(%5784, %872) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5786 = torch.operator "onnx.Add"(%233, %5785) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5787 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.10_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5788 = torch.operator "onnx.Unsqueeze"(%5438, %5787) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %5789 = torch.operator "onnx.Mul"(%5788, %5786) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5790 = torch.operator "onnx.Add"(%5758, %5789) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5791 = torch.operator "onnx.Gemm"(%1285, %234, %235) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %5792 = torch.operator "onnx.Shape"(%5791) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %5793 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_norm1_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5794 = torch.operator "onnx.Gather"(%5792, %5793) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5795 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_norm1_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5796 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_norm1_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5797 = torch.operator "onnx.Add"(%5794, %5796) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5798 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_norm1_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5799 = torch.operator "onnx.Div"(%5797, %5798) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5800 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_norm1_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5801 = torch.operator "onnx.Mul"(%5799, %5800) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5802 = torch.operator "onnx.Slice"(%5791, %5795, %5801, %5793) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %5803 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_norm1_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5804 = torch.operator "onnx.Mul"(%5799, %5803) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5805 = torch.operator "onnx.Slice"(%5791, %5801, %5804, %5793) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %5806 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_norm1_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5807 = torch.operator "onnx.Mul"(%5799, %5806) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5808 = torch.operator "onnx.Slice"(%5791, %5804, %5807, %5793) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %5809 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_norm1_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5810 = torch.operator "onnx.Mul"(%5799, %5809) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5811 = torch.operator "onnx.Slice"(%5791, %5807, %5810, %5793) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %5812 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_norm1_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5813 = torch.operator "onnx.Mul"(%5799, %5812) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5814 = torch.operator "onnx.Slice"(%5791, %5810, %5813, %5793) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %5815 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_norm1_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5816 = torch.operator "onnx.Mul"(%5799, %5815) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5817 = torch.operator "onnx.Slice"(%5791, %5813, %5816, %5793) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.112Fnorm12Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.11/norm1/norm/Constant_attr__value" : tensor<3072xbf16> %5818 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.112Fnorm12Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.112Fnorm12Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.11/norm1/norm/Constant_1_attr__value" : tensor<3072xbf16> %5819 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.112Fnorm12Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %5820 = torch.operator "onnx.LayerNormalization"(%5754, %5818, %5819) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5821 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_norm1_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5822 = torch.operator "onnx.Unsqueeze"(%5805, %5821) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %5823 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_norm1_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %5824 = torch.operator "onnx.Add"(%5822, %5823) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %5825 = torch.operator "onnx.Mul"(%5820, %5824) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5826 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_norm1_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5827 = torch.operator "onnx.Unsqueeze"(%5802, %5826) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %5828 = torch.operator "onnx.Add"(%5825, %5827) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5829 = torch.operator "onnx.Gemm"(%1285, %236, %237) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %5830 = torch.operator "onnx.Shape"(%5829) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %5831 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_norm1_context_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5832 = torch.operator "onnx.Gather"(%5830, %5831) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5833 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_norm1_context_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5834 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_norm1_context_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5835 = torch.operator "onnx.Add"(%5832, %5834) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5836 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_norm1_context_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5837 = torch.operator "onnx.Div"(%5835, %5836) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5838 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_norm1_context_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5839 = torch.operator "onnx.Mul"(%5837, %5838) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5840 = torch.operator "onnx.Slice"(%5829, %5833, %5839, %5831) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %5841 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_norm1_context_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5842 = torch.operator "onnx.Mul"(%5837, %5841) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5843 = torch.operator "onnx.Slice"(%5829, %5839, %5842, %5831) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %5844 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_norm1_context_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5845 = torch.operator "onnx.Mul"(%5837, %5844) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5846 = torch.operator "onnx.Slice"(%5829, %5842, %5845, %5831) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %5847 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_norm1_context_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5848 = torch.operator "onnx.Mul"(%5837, %5847) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5849 = torch.operator "onnx.Slice"(%5829, %5845, %5848, %5831) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %5850 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_norm1_context_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5851 = torch.operator "onnx.Mul"(%5837, %5850) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5852 = torch.operator "onnx.Slice"(%5829, %5848, %5851, %5831) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %5853 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_norm1_context_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5854 = torch.operator "onnx.Mul"(%5837, %5853) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5855 = torch.operator "onnx.Slice"(%5829, %5851, %5854, %5831) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.112Fnorm1_context2Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.11/norm1_context/norm/Constant_attr__value" : tensor<3072xbf16> %5856 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.112Fnorm1_context2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.112Fnorm1_context2Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.11/norm1_context/norm/Constant_1_attr__value" : tensor<3072xbf16> %5857 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.112Fnorm1_context2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %5858 = torch.operator "onnx.LayerNormalization"(%5790, %5856, %5857) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5859 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_norm1_context_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5860 = torch.operator "onnx.Unsqueeze"(%5843, %5859) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %5861 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_norm1_context_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %5862 = torch.operator "onnx.Add"(%5860, %5861) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %5863 = torch.operator "onnx.Mul"(%5858, %5862) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5864 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_norm1_context_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5865 = torch.operator "onnx.Unsqueeze"(%5840, %5864) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %5866 = torch.operator "onnx.Add"(%5863, %5865) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5867 = torch.operator "onnx.Shape"(%5866) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %5868 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %5869 = torch.operator "onnx.Gather"(%5867, %5868) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %5870 = torch.operator "onnx.MatMul"(%5828, %873) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5871 = torch.operator "onnx.Add"(%240, %5870) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5872 = torch.operator "onnx.MatMul"(%5828, %874) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5873 = torch.operator "onnx.Add"(%241, %5872) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5874 = torch.operator "onnx.MatMul"(%5828, %875) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5875 = torch.operator "onnx.Add"(%242, %5874) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %5876 = torch.operator "onnx.Shape"(%5873) : (!torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[3],si64> %5877 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %5878 = torch.operator "onnx.Gather"(%5876, %5877) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %5879 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %5880 = torch.operator "onnx.Div"(%5878, %5879) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %5881 = torch.operator "onnx.Cast"(%5880) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %5882 = torch.operator "onnx.Cast"(%5881) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %5883 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10425_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5884 = torch.operator "onnx.Unsqueeze"(%5869, %5883) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5885 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5886 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5887 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10429_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5888 = torch.operator "onnx.Unsqueeze"(%5882, %5887) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5889 = torch.operator "onnx.Concat"(%5884, %5885, %5886, %5888) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %5890 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10432_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5891 = torch.operator "onnx.Unsqueeze"(%5869, %5890) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5892 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5893 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5894 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10436_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5895 = torch.operator "onnx.Unsqueeze"(%5882, %5894) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5896 = torch.operator "onnx.Concat"(%5891, %5892, %5893, %5895) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %5897 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10439_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5898 = torch.operator "onnx.Unsqueeze"(%5869, %5897) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5899 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5900 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5901 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10443_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5902 = torch.operator "onnx.Unsqueeze"(%5882, %5901) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5903 = torch.operator "onnx.Concat"(%5898, %5899, %5900, %5902) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %5904 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10446_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5905 = torch.operator "onnx.Unsqueeze"(%5869, %5904) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5906 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5907 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5908 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10450_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5909 = torch.operator "onnx.Unsqueeze"(%5882, %5908) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5910 = torch.operator "onnx.Concat"(%5905, %5906, %5907, %5909) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %5911 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10453_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5912 = torch.operator "onnx.Unsqueeze"(%5869, %5911) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5913 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_11_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5914 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5915 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10457_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5916 = torch.operator "onnx.Unsqueeze"(%5882, %5915) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5917 = torch.operator "onnx.Concat"(%5912, %5913, %5914, %5916) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %5918 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10460_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5919 = torch.operator "onnx.Unsqueeze"(%5869, %5918) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5920 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5921 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_14_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5922 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10464_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %5923 = torch.operator "onnx.Unsqueeze"(%5882, %5922) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %5924 = torch.operator "onnx.Concat"(%5919, %5920, %5921, %5923) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %5925 = torch.operator "onnx.Reshape"(%5871, %5889) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %5926 = torch.operator "onnx.Transpose"(%5925) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %5927 = torch.operator "onnx.Reshape"(%5873, %5896) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %5928 = torch.operator "onnx.Transpose"(%5927) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %5929 = torch.operator "onnx.Reshape"(%5875, %5903) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %5930 = torch.operator "onnx.Transpose"(%5929) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %5931 = torch.operator "onnx.Cast"(%5926) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %5932 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %5933 = torch.operator "onnx.Pow"(%5931, %5932) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %5934 = torch.operator "onnx.ReduceMean"(%5933) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5935 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %5936 = torch.operator "onnx.Add"(%5934, %5935) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5937 = torch.operator "onnx.Sqrt"(%5936) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5938 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %5939 = torch.operator "onnx.Div"(%5938, %5937) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5940 = torch.operator "onnx.Cast"(%5926) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %5941 = torch.operator "onnx.Mul"(%5940, %5939) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %5942 = torch.operator "onnx.Cast"(%5941) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %5943 = torch.operator "onnx.Mul"(%5942, %238) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %5944 = torch.operator "onnx.Cast"(%5928) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %5945 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %5946 = torch.operator "onnx.Pow"(%5944, %5945) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %5947 = torch.operator "onnx.ReduceMean"(%5946) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5948 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %5949 = torch.operator "onnx.Add"(%5947, %5948) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5950 = torch.operator "onnx.Sqrt"(%5949) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5951 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %5952 = torch.operator "onnx.Div"(%5951, %5950) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5953 = torch.operator "onnx.Cast"(%5928) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %5954 = torch.operator "onnx.Mul"(%5953, %5952) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %5955 = torch.operator "onnx.Cast"(%5954) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %5956 = torch.operator "onnx.Mul"(%5955, %239) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %5957 = torch.operator "onnx.MatMul"(%5866, %876) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5958 = torch.operator "onnx.Add"(%245, %5957) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5959 = torch.operator "onnx.MatMul"(%5866, %877) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5960 = torch.operator "onnx.Add"(%243, %5959) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5961 = torch.operator "onnx.MatMul"(%5866, %878) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5962 = torch.operator "onnx.Add"(%244, %5961) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %5963 = torch.operator "onnx.Reshape"(%5958, %5910) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %5964 = torch.operator "onnx.Transpose"(%5963) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %5965 = torch.operator "onnx.Reshape"(%5960, %5917) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %5966 = torch.operator "onnx.Transpose"(%5965) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %5967 = torch.operator "onnx.Reshape"(%5962, %5924) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %5968 = torch.operator "onnx.Transpose"(%5967) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %5969 = torch.operator "onnx.Cast"(%5964) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %5970 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_norm_added_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %5971 = torch.operator "onnx.Pow"(%5969, %5970) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %5972 = torch.operator "onnx.ReduceMean"(%5971) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5973 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_norm_added_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %5974 = torch.operator "onnx.Add"(%5972, %5973) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5975 = torch.operator "onnx.Sqrt"(%5974) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5976 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_norm_added_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %5977 = torch.operator "onnx.Div"(%5976, %5975) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5978 = torch.operator "onnx.Cast"(%5964) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %5979 = torch.operator "onnx.Mul"(%5978, %5977) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %5980 = torch.operator "onnx.Cast"(%5979) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %5981 = torch.operator "onnx.Mul"(%5980, %248) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %5982 = torch.operator "onnx.Cast"(%5966) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %5983 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_norm_added_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %5984 = torch.operator "onnx.Pow"(%5982, %5983) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %5985 = torch.operator "onnx.ReduceMean"(%5984) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5986 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_norm_added_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %5987 = torch.operator "onnx.Add"(%5985, %5986) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5988 = torch.operator "onnx.Sqrt"(%5987) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5989 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_norm_added_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %5990 = torch.operator "onnx.Div"(%5989, %5988) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %5991 = torch.operator "onnx.Cast"(%5966) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %5992 = torch.operator "onnx.Mul"(%5991, %5990) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %5993 = torch.operator "onnx.Cast"(%5992) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %5994 = torch.operator "onnx.Mul"(%5993, %249) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %5995 = torch.operator "onnx.Concat"(%5981, %5943) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %5996 = torch.operator "onnx.Concat"(%5994, %5956) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %5997 = torch.operator "onnx.Concat"(%5968, %5930) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %5998 = torch.operator "onnx.Shape"(%5995) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %5999 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_15_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %6000 = torch.operator "onnx.Gather"(%5998, %5999) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %6001 = torch.operator "onnx.Shape"(%5995) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %6002 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_16_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %6003 = torch.operator "onnx.Gather"(%6001, %6002) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %6004 = torch.operator "onnx.Shape"(%5995) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %6005 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_17_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %6006 = torch.operator "onnx.Gather"(%6004, %6005) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %6007 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10549_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6008 = torch.operator "onnx.Unsqueeze"(%6000, %6007) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6009 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10551_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6010 = torch.operator "onnx.Unsqueeze"(%6003, %6009) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6011 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10553_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6012 = torch.operator "onnx.Unsqueeze"(%6006, %6011) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6013 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6014 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6015 = torch.operator "onnx.Concat"(%6008, %6010, %6012, %6013, %6014) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %6016 = torch.operator "onnx.Reshape"(%5995, %6015) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %6017 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_20_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %6018:2 = torch.operator "onnx.Split"(%6016, %6017) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %6019 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6020 = torch.operator "onnx.Squeeze"(%6018#0, %6019) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %6021 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6022 = torch.operator "onnx.Squeeze"(%6018#1, %6021) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %6023 = torch.operator "onnx.Neg"(%6022) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %6024 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_23_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6025 = torch.operator "onnx.Unsqueeze"(%6023, %6024) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %6026 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_24_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6027 = torch.operator "onnx.Unsqueeze"(%6020, %6026) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %6028 = torch.operator "onnx.Concat"(%6025, %6027) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %6029 = torch.operator "onnx.Shape"(%6028) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %6030 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_25_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6031 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6032 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6033 = torch.operator "onnx.Slice"(%6029, %6031, %6032, %6030) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %6034 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_28_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6035 = torch.operator "onnx.Concat"(%6033, %6034) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %6036 = torch.operator "onnx.Reshape"(%6028, %6035) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %6037 = torch.operator "onnx.Cast"(%5995) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %6038 = torch.operator "onnx.Mul"(%6037, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %6039 = torch.operator "onnx.Cast"(%6036) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %6040 = torch.operator "onnx.Mul"(%6039, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %6041 = torch.operator "onnx.Add"(%6038, %6040) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %6042 = torch.operator "onnx.Cast"(%6041) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %6043 = torch.operator "onnx.Shape"(%5996) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %6044 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_29_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %6045 = torch.operator "onnx.Gather"(%6043, %6044) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %6046 = torch.operator "onnx.Shape"(%5996) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %6047 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_30_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %6048 = torch.operator "onnx.Gather"(%6046, %6047) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %6049 = torch.operator "onnx.Shape"(%5996) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %6050 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_31_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %6051 = torch.operator "onnx.Gather"(%6049, %6050) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %6052 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10594_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6053 = torch.operator "onnx.Unsqueeze"(%6045, %6052) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6054 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10596_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6055 = torch.operator "onnx.Unsqueeze"(%6048, %6054) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6056 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10598_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6057 = torch.operator "onnx.Unsqueeze"(%6051, %6056) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6058 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6059 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6060 = torch.operator "onnx.Concat"(%6053, %6055, %6057, %6058, %6059) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %6061 = torch.operator "onnx.Reshape"(%5996, %6060) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %6062 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_34_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %6063:2 = torch.operator "onnx.Split"(%6061, %6062) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %6064 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6065 = torch.operator "onnx.Squeeze"(%6063#0, %6064) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %6066 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6067 = torch.operator "onnx.Squeeze"(%6063#1, %6066) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %6068 = torch.operator "onnx.Neg"(%6067) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %6069 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6070 = torch.operator "onnx.Unsqueeze"(%6068, %6069) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %6071 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6072 = torch.operator "onnx.Unsqueeze"(%6065, %6071) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %6073 = torch.operator "onnx.Concat"(%6070, %6072) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %6074 = torch.operator "onnx.Shape"(%6073) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %6075 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_39_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6076 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_40_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6077 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6078 = torch.operator "onnx.Slice"(%6074, %6076, %6077, %6075) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %6079 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_42_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6080 = torch.operator "onnx.Concat"(%6078, %6079) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %6081 = torch.operator "onnx.Reshape"(%6073, %6080) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %6082 = torch.operator "onnx.Cast"(%5996) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %6083 = torch.operator "onnx.Mul"(%6082, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %6084 = torch.operator "onnx.Cast"(%6081) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %6085 = torch.operator "onnx.Mul"(%6084, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %6086 = torch.operator "onnx.Add"(%6083, %6085) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %6087 = torch.operator "onnx.Cast"(%6086) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %6088 = torch.operator "onnx.Shape"(%6042) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %6089 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_43_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6090 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_44_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6091 = torch.operator "onnx.Slice"(%6088, %6089, %6090) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6092 = torch.operator "onnx.Cast"(%6091) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %6093 = torch.operator "onnx.Sqrt"(%6092) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %6094 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_45_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %6095 = torch.operator "onnx.Cast"(%6093) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %6096 = torch.operator "onnx.Div"(%6094, %6095) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %6097 = torch.operator "onnx.Cast"(%6096) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %6098 = torch.operator "onnx.Transpose"(%6087) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %6099 = torch.operator "onnx.Sqrt"(%6097) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %6100 = torch.operator "onnx.Mul"(%6042, %6099) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %6101 = torch.operator "onnx.Sqrt"(%6097) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %6102 = torch.operator "onnx.Mul"(%6098, %6101) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %6103 = torch.operator "onnx.MatMul"(%6100, %6102) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %6104 = torch.operator "onnx.Softmax"(%6103) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %6105 = torch.operator "onnx.MatMul"(%6104, %5997) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %6106 = torch.operator "onnx.Transpose"(%6105) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %6107 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_46_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %6108 = torch.operator "onnx.Mul"(%5882, %6107) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %6109 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10651_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6110 = torch.operator "onnx.Unsqueeze"(%5869, %6109) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6111 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_47_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6112 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10654_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6113 = torch.operator "onnx.Unsqueeze"(%6108, %6112) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6114 = torch.operator "onnx.Concat"(%6110, %6111, %6113) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %6115 = torch.operator "onnx.Reshape"(%6106, %6114) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %6116 = torch.operator "onnx.Cast"(%6115) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %6117 = torch.operator "onnx.Shape"(%5866) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %6118 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_48_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %6119 = torch.operator "onnx.Gather"(%6117, %6118) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %6120 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_49_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6121 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_50_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6122 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_51_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6123 = torch.operator "onnx.Unsqueeze"(%6119, %6122) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6124 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_52_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6125 = torch.operator "onnx.Slice"(%6116, %6121, %6123, %6120, %6124) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %6126 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_53_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6127 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_54_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6128 = torch.operator "onnx.Unsqueeze"(%6119, %6127) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6129 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_55_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6130 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_attn_Constant_56_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6131 = torch.operator "onnx.Slice"(%6116, %6128, %6129, %6126, %6130) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %6132 = torch.operator "onnx.MatMul"(%6131, %879) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %6133 = torch.operator "onnx.Add"(%246, %6132) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %6134 = torch.operator "onnx.MatMul"(%6125, %880) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %6135 = torch.operator "onnx.Add"(%247, %6134) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %6136 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6137 = torch.operator "onnx.Unsqueeze"(%5808, %6136) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %6138 = torch.operator "onnx.Mul"(%6137, %6133) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %6139 = torch.operator "onnx.Add"(%5754, %6138) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %_2Ftransformer_blocks.112Fnorm22FConstant_attr__value = util.global.load @"/transformer_blocks.11/norm2/Constant_attr__value" : tensor<3072xbf16> %6140 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.112Fnorm22FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.112Fnorm22FConstant_1_attr__value = util.global.load @"/transformer_blocks.11/norm2/Constant_1_attr__value" : tensor<3072xbf16> %6141 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.112Fnorm22FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %6142 = torch.operator "onnx.LayerNormalization"(%6139, %6140, %6141) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %6143 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6144 = torch.operator "onnx.Unsqueeze"(%5814, %6143) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %6145 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %6146 = torch.operator "onnx.Add"(%6144, %6145) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %6147 = torch.operator "onnx.Mul"(%6142, %6146) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %6148 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6149 = torch.operator "onnx.Unsqueeze"(%5811, %6148) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %6150 = torch.operator "onnx.Add"(%6147, %6149) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %6151 = torch.operator "onnx.MatMul"(%6150, %881) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %6152 = torch.operator "onnx.Add"(%250, %6151) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %6153 = torch.operator "onnx.Mul"(%6152, %6152) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %6154 = torch.operator "onnx.Mul"(%6152, %6153) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %6155 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_ff_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %6156 = torch.operator "onnx.Mul"(%6155, %6154) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %6157 = torch.operator "onnx.Add"(%6152, %6156) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %6158 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_ff_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %6159 = torch.operator "onnx.Mul"(%6158, %6157) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %6160 = torch.operator "onnx.Tanh"(%6159) : (!torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %6161 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_ff_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %6162 = torch.operator "onnx.Add"(%6161, %6160) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %6163 = torch.operator "onnx.Mul"(%6152, %6162) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %6164 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_ff_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %6165 = torch.operator "onnx.Mul"(%6164, %6163) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %6166 = torch.operator "onnx.MatMul"(%6165, %882) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %6167 = torch.operator "onnx.Add"(%251, %6166) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %6168 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6169 = torch.operator "onnx.Unsqueeze"(%5817, %6168) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %6170 = torch.operator "onnx.Mul"(%6169, %6167) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %6171 = torch.operator "onnx.Add"(%6139, %6170) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %6172 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6173 = torch.operator "onnx.Unsqueeze"(%5846, %6172) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %6174 = torch.operator "onnx.Mul"(%6173, %6135) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %6175 = torch.operator "onnx.Add"(%5790, %6174) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %_2Ftransformer_blocks.112Fnorm2_context2FConstant_attr__value = util.global.load @"/transformer_blocks.11/norm2_context/Constant_attr__value" : tensor<3072xbf16> %6176 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.112Fnorm2_context2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.112Fnorm2_context2FConstant_1_attr__value = util.global.load @"/transformer_blocks.11/norm2_context/Constant_1_attr__value" : tensor<3072xbf16> %6177 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.112Fnorm2_context2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %6178 = torch.operator "onnx.LayerNormalization"(%6175, %6176, %6177) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %6179 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6180 = torch.operator "onnx.Unsqueeze"(%5852, %6179) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %6181 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_Constant_7_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %6182 = torch.operator "onnx.Add"(%6180, %6181) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %6183 = torch.operator "onnx.Mul"(%6178, %6182) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %6184 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6185 = torch.operator "onnx.Unsqueeze"(%5849, %6184) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %6186 = torch.operator "onnx.Add"(%6183, %6185) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %6187 = torch.operator "onnx.MatMul"(%6186, %883) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %6188 = torch.operator "onnx.Add"(%252, %6187) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %6189 = torch.operator "onnx.Mul"(%6188, %6188) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %6190 = torch.operator "onnx.Mul"(%6188, %6189) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %6191 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_ff_context_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %6192 = torch.operator "onnx.Mul"(%6191, %6190) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %6193 = torch.operator "onnx.Add"(%6188, %6192) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %6194 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_ff_context_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %6195 = torch.operator "onnx.Mul"(%6194, %6193) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %6196 = torch.operator "onnx.Tanh"(%6195) : (!torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %6197 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_ff_context_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %6198 = torch.operator "onnx.Add"(%6197, %6196) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %6199 = torch.operator "onnx.Mul"(%6188, %6198) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %6200 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_ff_context_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %6201 = torch.operator "onnx.Mul"(%6200, %6199) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %6202 = torch.operator "onnx.MatMul"(%6201, %884) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %6203 = torch.operator "onnx.Add"(%253, %6202) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %6204 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.11_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6205 = torch.operator "onnx.Unsqueeze"(%5855, %6204) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %6206 = torch.operator "onnx.Mul"(%6205, %6203) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %6207 = torch.operator "onnx.Add"(%6175, %6206) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %6208 = torch.operator "onnx.Gemm"(%1285, %254, %255) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %6209 = torch.operator "onnx.Shape"(%6208) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %6210 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_norm1_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6211 = torch.operator "onnx.Gather"(%6209, %6210) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6212 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_norm1_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6213 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_norm1_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6214 = torch.operator "onnx.Add"(%6211, %6213) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6215 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_norm1_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6216 = torch.operator "onnx.Div"(%6214, %6215) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6217 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_norm1_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6218 = torch.operator "onnx.Mul"(%6216, %6217) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6219 = torch.operator "onnx.Slice"(%6208, %6212, %6218, %6210) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %6220 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_norm1_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6221 = torch.operator "onnx.Mul"(%6216, %6220) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6222 = torch.operator "onnx.Slice"(%6208, %6218, %6221, %6210) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %6223 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_norm1_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6224 = torch.operator "onnx.Mul"(%6216, %6223) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6225 = torch.operator "onnx.Slice"(%6208, %6221, %6224, %6210) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %6226 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_norm1_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6227 = torch.operator "onnx.Mul"(%6216, %6226) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6228 = torch.operator "onnx.Slice"(%6208, %6224, %6227, %6210) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %6229 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_norm1_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6230 = torch.operator "onnx.Mul"(%6216, %6229) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6231 = torch.operator "onnx.Slice"(%6208, %6227, %6230, %6210) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %6232 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_norm1_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6233 = torch.operator "onnx.Mul"(%6216, %6232) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6234 = torch.operator "onnx.Slice"(%6208, %6230, %6233, %6210) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.122Fnorm12Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.12/norm1/norm/Constant_attr__value" : tensor<3072xbf16> %6235 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.122Fnorm12Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.122Fnorm12Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.12/norm1/norm/Constant_1_attr__value" : tensor<3072xbf16> %6236 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.122Fnorm12Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %6237 = torch.operator "onnx.LayerNormalization"(%6171, %6235, %6236) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %6238 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_norm1_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6239 = torch.operator "onnx.Unsqueeze"(%6222, %6238) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %6240 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_norm1_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %6241 = torch.operator "onnx.Add"(%6239, %6240) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %6242 = torch.operator "onnx.Mul"(%6237, %6241) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %6243 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_norm1_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6244 = torch.operator "onnx.Unsqueeze"(%6219, %6243) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %6245 = torch.operator "onnx.Add"(%6242, %6244) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %6246 = torch.operator "onnx.Gemm"(%1285, %256, %257) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %6247 = torch.operator "onnx.Shape"(%6246) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %6248 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_norm1_context_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6249 = torch.operator "onnx.Gather"(%6247, %6248) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6250 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_norm1_context_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6251 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_norm1_context_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6252 = torch.operator "onnx.Add"(%6249, %6251) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6253 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_norm1_context_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6254 = torch.operator "onnx.Div"(%6252, %6253) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6255 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_norm1_context_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6256 = torch.operator "onnx.Mul"(%6254, %6255) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6257 = torch.operator "onnx.Slice"(%6246, %6250, %6256, %6248) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %6258 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_norm1_context_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6259 = torch.operator "onnx.Mul"(%6254, %6258) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6260 = torch.operator "onnx.Slice"(%6246, %6256, %6259, %6248) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %6261 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_norm1_context_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6262 = torch.operator "onnx.Mul"(%6254, %6261) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6263 = torch.operator "onnx.Slice"(%6246, %6259, %6262, %6248) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %6264 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_norm1_context_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6265 = torch.operator "onnx.Mul"(%6254, %6264) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6266 = torch.operator "onnx.Slice"(%6246, %6262, %6265, %6248) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %6267 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_norm1_context_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6268 = torch.operator "onnx.Mul"(%6254, %6267) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6269 = torch.operator "onnx.Slice"(%6246, %6265, %6268, %6248) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %6270 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_norm1_context_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6271 = torch.operator "onnx.Mul"(%6254, %6270) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6272 = torch.operator "onnx.Slice"(%6246, %6268, %6271, %6248) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.122Fnorm1_context2Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.12/norm1_context/norm/Constant_attr__value" : tensor<3072xbf16> %6273 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.122Fnorm1_context2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.122Fnorm1_context2Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.12/norm1_context/norm/Constant_1_attr__value" : tensor<3072xbf16> %6274 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.122Fnorm1_context2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %6275 = torch.operator "onnx.LayerNormalization"(%6207, %6273, %6274) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %6276 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_norm1_context_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6277 = torch.operator "onnx.Unsqueeze"(%6260, %6276) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %6278 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_norm1_context_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %6279 = torch.operator "onnx.Add"(%6277, %6278) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %6280 = torch.operator "onnx.Mul"(%6275, %6279) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %6281 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_norm1_context_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6282 = torch.operator "onnx.Unsqueeze"(%6257, %6281) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %6283 = torch.operator "onnx.Add"(%6280, %6282) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %6284 = torch.operator "onnx.Shape"(%6283) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %6285 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %6286 = torch.operator "onnx.Gather"(%6284, %6285) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %6287 = torch.operator "onnx.MatMul"(%6245, %885) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %6288 = torch.operator "onnx.Add"(%260, %6287) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %6289 = torch.operator "onnx.MatMul"(%6245, %886) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %6290 = torch.operator "onnx.Add"(%261, %6289) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %6291 = torch.operator "onnx.MatMul"(%6245, %887) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %6292 = torch.operator "onnx.Add"(%262, %6291) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %6293 = torch.operator "onnx.Shape"(%6290) : (!torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[3],si64> %6294 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %6295 = torch.operator "onnx.Gather"(%6293, %6294) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %6296 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %6297 = torch.operator "onnx.Div"(%6295, %6296) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %6298 = torch.operator "onnx.Cast"(%6297) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %6299 = torch.operator "onnx.Cast"(%6298) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %6300 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10842_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6301 = torch.operator "onnx.Unsqueeze"(%6286, %6300) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6302 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6303 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6304 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10846_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6305 = torch.operator "onnx.Unsqueeze"(%6299, %6304) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6306 = torch.operator "onnx.Concat"(%6301, %6302, %6303, %6305) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %6307 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10849_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6308 = torch.operator "onnx.Unsqueeze"(%6286, %6307) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6309 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6310 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6311 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10853_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6312 = torch.operator "onnx.Unsqueeze"(%6299, %6311) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6313 = torch.operator "onnx.Concat"(%6308, %6309, %6310, %6312) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %6314 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10856_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6315 = torch.operator "onnx.Unsqueeze"(%6286, %6314) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6316 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6317 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6318 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10860_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6319 = torch.operator "onnx.Unsqueeze"(%6299, %6318) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6320 = torch.operator "onnx.Concat"(%6315, %6316, %6317, %6319) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %6321 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10863_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6322 = torch.operator "onnx.Unsqueeze"(%6286, %6321) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6323 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6324 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6325 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10867_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6326 = torch.operator "onnx.Unsqueeze"(%6299, %6325) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6327 = torch.operator "onnx.Concat"(%6322, %6323, %6324, %6326) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %6328 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10870_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6329 = torch.operator "onnx.Unsqueeze"(%6286, %6328) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6330 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_11_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6331 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6332 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10874_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6333 = torch.operator "onnx.Unsqueeze"(%6299, %6332) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6334 = torch.operator "onnx.Concat"(%6329, %6330, %6331, %6333) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %6335 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10877_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6336 = torch.operator "onnx.Unsqueeze"(%6286, %6335) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6337 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6338 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_14_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6339 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10881_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6340 = torch.operator "onnx.Unsqueeze"(%6299, %6339) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6341 = torch.operator "onnx.Concat"(%6336, %6337, %6338, %6340) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %6342 = torch.operator "onnx.Reshape"(%6288, %6306) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %6343 = torch.operator "onnx.Transpose"(%6342) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %6344 = torch.operator "onnx.Reshape"(%6290, %6313) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %6345 = torch.operator "onnx.Transpose"(%6344) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %6346 = torch.operator "onnx.Reshape"(%6292, %6320) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %6347 = torch.operator "onnx.Transpose"(%6346) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %6348 = torch.operator "onnx.Cast"(%6343) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %6349 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %6350 = torch.operator "onnx.Pow"(%6348, %6349) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %6351 = torch.operator "onnx.ReduceMean"(%6350) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %6352 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %6353 = torch.operator "onnx.Add"(%6351, %6352) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %6354 = torch.operator "onnx.Sqrt"(%6353) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %6355 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %6356 = torch.operator "onnx.Div"(%6355, %6354) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %6357 = torch.operator "onnx.Cast"(%6343) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %6358 = torch.operator "onnx.Mul"(%6357, %6356) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %6359 = torch.operator "onnx.Cast"(%6358) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %6360 = torch.operator "onnx.Mul"(%6359, %258) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %6361 = torch.operator "onnx.Cast"(%6345) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %6362 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %6363 = torch.operator "onnx.Pow"(%6361, %6362) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %6364 = torch.operator "onnx.ReduceMean"(%6363) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %6365 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %6366 = torch.operator "onnx.Add"(%6364, %6365) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %6367 = torch.operator "onnx.Sqrt"(%6366) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %6368 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %6369 = torch.operator "onnx.Div"(%6368, %6367) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %6370 = torch.operator "onnx.Cast"(%6345) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %6371 = torch.operator "onnx.Mul"(%6370, %6369) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %6372 = torch.operator "onnx.Cast"(%6371) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %6373 = torch.operator "onnx.Mul"(%6372, %259) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %6374 = torch.operator "onnx.MatMul"(%6283, %888) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %6375 = torch.operator "onnx.Add"(%265, %6374) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %6376 = torch.operator "onnx.MatMul"(%6283, %889) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %6377 = torch.operator "onnx.Add"(%263, %6376) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %6378 = torch.operator "onnx.MatMul"(%6283, %890) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %6379 = torch.operator "onnx.Add"(%264, %6378) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %6380 = torch.operator "onnx.Reshape"(%6375, %6327) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %6381 = torch.operator "onnx.Transpose"(%6380) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %6382 = torch.operator "onnx.Reshape"(%6377, %6334) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %6383 = torch.operator "onnx.Transpose"(%6382) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %6384 = torch.operator "onnx.Reshape"(%6379, %6341) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %6385 = torch.operator "onnx.Transpose"(%6384) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %6386 = torch.operator "onnx.Cast"(%6381) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %6387 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_norm_added_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %6388 = torch.operator "onnx.Pow"(%6386, %6387) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %6389 = torch.operator "onnx.ReduceMean"(%6388) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %6390 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_norm_added_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %6391 = torch.operator "onnx.Add"(%6389, %6390) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %6392 = torch.operator "onnx.Sqrt"(%6391) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %6393 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_norm_added_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %6394 = torch.operator "onnx.Div"(%6393, %6392) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %6395 = torch.operator "onnx.Cast"(%6381) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %6396 = torch.operator "onnx.Mul"(%6395, %6394) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %6397 = torch.operator "onnx.Cast"(%6396) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %6398 = torch.operator "onnx.Mul"(%6397, %268) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %6399 = torch.operator "onnx.Cast"(%6383) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %6400 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_norm_added_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %6401 = torch.operator "onnx.Pow"(%6399, %6400) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %6402 = torch.operator "onnx.ReduceMean"(%6401) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %6403 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_norm_added_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %6404 = torch.operator "onnx.Add"(%6402, %6403) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %6405 = torch.operator "onnx.Sqrt"(%6404) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %6406 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_norm_added_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %6407 = torch.operator "onnx.Div"(%6406, %6405) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %6408 = torch.operator "onnx.Cast"(%6383) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %6409 = torch.operator "onnx.Mul"(%6408, %6407) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %6410 = torch.operator "onnx.Cast"(%6409) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %6411 = torch.operator "onnx.Mul"(%6410, %269) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %6412 = torch.operator "onnx.Concat"(%6398, %6360) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %6413 = torch.operator "onnx.Concat"(%6411, %6373) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %6414 = torch.operator "onnx.Concat"(%6385, %6347) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %6415 = torch.operator "onnx.Shape"(%6412) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %6416 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_15_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %6417 = torch.operator "onnx.Gather"(%6415, %6416) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %6418 = torch.operator "onnx.Shape"(%6412) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %6419 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_16_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %6420 = torch.operator "onnx.Gather"(%6418, %6419) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %6421 = torch.operator "onnx.Shape"(%6412) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %6422 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_17_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %6423 = torch.operator "onnx.Gather"(%6421, %6422) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %6424 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10966_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6425 = torch.operator "onnx.Unsqueeze"(%6417, %6424) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6426 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10968_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6427 = torch.operator "onnx.Unsqueeze"(%6420, %6426) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6428 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_10970_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6429 = torch.operator "onnx.Unsqueeze"(%6423, %6428) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6430 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6431 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6432 = torch.operator "onnx.Concat"(%6425, %6427, %6429, %6430, %6431) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %6433 = torch.operator "onnx.Reshape"(%6412, %6432) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %6434 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_20_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %6435:2 = torch.operator "onnx.Split"(%6433, %6434) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %6436 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6437 = torch.operator "onnx.Squeeze"(%6435#0, %6436) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %6438 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6439 = torch.operator "onnx.Squeeze"(%6435#1, %6438) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %6440 = torch.operator "onnx.Neg"(%6439) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %6441 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_23_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6442 = torch.operator "onnx.Unsqueeze"(%6440, %6441) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %6443 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_24_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6444 = torch.operator "onnx.Unsqueeze"(%6437, %6443) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %6445 = torch.operator "onnx.Concat"(%6442, %6444) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %6446 = torch.operator "onnx.Shape"(%6445) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %6447 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_25_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6448 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6449 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6450 = torch.operator "onnx.Slice"(%6446, %6448, %6449, %6447) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %6451 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_28_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6452 = torch.operator "onnx.Concat"(%6450, %6451) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %6453 = torch.operator "onnx.Reshape"(%6445, %6452) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %6454 = torch.operator "onnx.Cast"(%6412) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %6455 = torch.operator "onnx.Mul"(%6454, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %6456 = torch.operator "onnx.Cast"(%6453) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %6457 = torch.operator "onnx.Mul"(%6456, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %6458 = torch.operator "onnx.Add"(%6455, %6457) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %6459 = torch.operator "onnx.Cast"(%6458) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %6460 = torch.operator "onnx.Shape"(%6413) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %6461 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_29_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %6462 = torch.operator "onnx.Gather"(%6460, %6461) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %6463 = torch.operator "onnx.Shape"(%6413) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %6464 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_30_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %6465 = torch.operator "onnx.Gather"(%6463, %6464) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %6466 = torch.operator "onnx.Shape"(%6413) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %6467 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_31_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %6468 = torch.operator "onnx.Gather"(%6466, %6467) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %6469 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11011_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6470 = torch.operator "onnx.Unsqueeze"(%6462, %6469) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6471 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11013_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6472 = torch.operator "onnx.Unsqueeze"(%6465, %6471) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6473 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11015_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6474 = torch.operator "onnx.Unsqueeze"(%6468, %6473) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6475 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6476 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6477 = torch.operator "onnx.Concat"(%6470, %6472, %6474, %6475, %6476) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %6478 = torch.operator "onnx.Reshape"(%6413, %6477) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %6479 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_34_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %6480:2 = torch.operator "onnx.Split"(%6478, %6479) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %6481 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6482 = torch.operator "onnx.Squeeze"(%6480#0, %6481) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %6483 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6484 = torch.operator "onnx.Squeeze"(%6480#1, %6483) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %6485 = torch.operator "onnx.Neg"(%6484) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %6486 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6487 = torch.operator "onnx.Unsqueeze"(%6485, %6486) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %6488 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6489 = torch.operator "onnx.Unsqueeze"(%6482, %6488) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %6490 = torch.operator "onnx.Concat"(%6487, %6489) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %6491 = torch.operator "onnx.Shape"(%6490) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %6492 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_39_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6493 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_40_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6494 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6495 = torch.operator "onnx.Slice"(%6491, %6493, %6494, %6492) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %6496 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_42_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6497 = torch.operator "onnx.Concat"(%6495, %6496) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %6498 = torch.operator "onnx.Reshape"(%6490, %6497) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %6499 = torch.operator "onnx.Cast"(%6413) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %6500 = torch.operator "onnx.Mul"(%6499, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %6501 = torch.operator "onnx.Cast"(%6498) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %6502 = torch.operator "onnx.Mul"(%6501, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %6503 = torch.operator "onnx.Add"(%6500, %6502) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %6504 = torch.operator "onnx.Cast"(%6503) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %6505 = torch.operator "onnx.Shape"(%6459) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %6506 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_43_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6507 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_44_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6508 = torch.operator "onnx.Slice"(%6505, %6506, %6507) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6509 = torch.operator "onnx.Cast"(%6508) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %6510 = torch.operator "onnx.Sqrt"(%6509) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %6511 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_45_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %6512 = torch.operator "onnx.Cast"(%6510) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %6513 = torch.operator "onnx.Div"(%6511, %6512) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %6514 = torch.operator "onnx.Cast"(%6513) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %6515 = torch.operator "onnx.Transpose"(%6504) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %6516 = torch.operator "onnx.Sqrt"(%6514) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %6517 = torch.operator "onnx.Mul"(%6459, %6516) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %6518 = torch.operator "onnx.Sqrt"(%6514) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %6519 = torch.operator "onnx.Mul"(%6515, %6518) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %6520 = torch.operator "onnx.MatMul"(%6517, %6519) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %6521 = torch.operator "onnx.Softmax"(%6520) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %6522 = torch.operator "onnx.MatMul"(%6521, %6414) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %6523 = torch.operator "onnx.Transpose"(%6522) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %6524 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_46_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %6525 = torch.operator "onnx.Mul"(%6299, %6524) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %6526 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11068_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6527 = torch.operator "onnx.Unsqueeze"(%6286, %6526) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6528 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_47_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6529 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11071_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6530 = torch.operator "onnx.Unsqueeze"(%6525, %6529) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6531 = torch.operator "onnx.Concat"(%6527, %6528, %6530) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %6532 = torch.operator "onnx.Reshape"(%6523, %6531) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %6533 = torch.operator "onnx.Cast"(%6532) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %6534 = torch.operator "onnx.Shape"(%6283) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %6535 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_48_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %6536 = torch.operator "onnx.Gather"(%6534, %6535) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %6537 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_49_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6538 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_50_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6539 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_51_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6540 = torch.operator "onnx.Unsqueeze"(%6536, %6539) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6541 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_52_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6542 = torch.operator "onnx.Slice"(%6533, %6538, %6540, %6537, %6541) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %6543 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_53_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6544 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_54_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6545 = torch.operator "onnx.Unsqueeze"(%6536, %6544) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6546 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_55_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6547 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_attn_Constant_56_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6548 = torch.operator "onnx.Slice"(%6533, %6545, %6546, %6543, %6547) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %6549 = torch.operator "onnx.MatMul"(%6548, %891) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %6550 = torch.operator "onnx.Add"(%266, %6549) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %6551 = torch.operator "onnx.MatMul"(%6542, %892) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %6552 = torch.operator "onnx.Add"(%267, %6551) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %6553 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6554 = torch.operator "onnx.Unsqueeze"(%6225, %6553) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %6555 = torch.operator "onnx.Mul"(%6554, %6550) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %6556 = torch.operator "onnx.Add"(%6171, %6555) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %_2Ftransformer_blocks.122Fnorm22FConstant_attr__value = util.global.load @"/transformer_blocks.12/norm2/Constant_attr__value" : tensor<3072xbf16> %6557 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.122Fnorm22FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.122Fnorm22FConstant_1_attr__value = util.global.load @"/transformer_blocks.12/norm2/Constant_1_attr__value" : tensor<3072xbf16> %6558 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.122Fnorm22FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %6559 = torch.operator "onnx.LayerNormalization"(%6556, %6557, %6558) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %6560 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6561 = torch.operator "onnx.Unsqueeze"(%6231, %6560) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %6562 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %6563 = torch.operator "onnx.Add"(%6561, %6562) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %6564 = torch.operator "onnx.Mul"(%6559, %6563) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %6565 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6566 = torch.operator "onnx.Unsqueeze"(%6228, %6565) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %6567 = torch.operator "onnx.Add"(%6564, %6566) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %6568 = torch.operator "onnx.MatMul"(%6567, %893) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %6569 = torch.operator "onnx.Add"(%270, %6568) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %6570 = torch.operator "onnx.Mul"(%6569, %6569) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %6571 = torch.operator "onnx.Mul"(%6569, %6570) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %6572 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_ff_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %6573 = torch.operator "onnx.Mul"(%6572, %6571) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %6574 = torch.operator "onnx.Add"(%6569, %6573) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %6575 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_ff_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %6576 = torch.operator "onnx.Mul"(%6575, %6574) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %6577 = torch.operator "onnx.Tanh"(%6576) : (!torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %6578 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_ff_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %6579 = torch.operator "onnx.Add"(%6578, %6577) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %6580 = torch.operator "onnx.Mul"(%6569, %6579) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %6581 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_ff_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %6582 = torch.operator "onnx.Mul"(%6581, %6580) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %6583 = torch.operator "onnx.MatMul"(%6582, %894) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %6584 = torch.operator "onnx.Add"(%271, %6583) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %6585 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6586 = torch.operator "onnx.Unsqueeze"(%6234, %6585) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %6587 = torch.operator "onnx.Mul"(%6586, %6584) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %6588 = torch.operator "onnx.Add"(%6556, %6587) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %6589 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6590 = torch.operator "onnx.Unsqueeze"(%6263, %6589) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %6591 = torch.operator "onnx.Mul"(%6590, %6552) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %6592 = torch.operator "onnx.Add"(%6207, %6591) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %_2Ftransformer_blocks.122Fnorm2_context2FConstant_attr__value = util.global.load @"/transformer_blocks.12/norm2_context/Constant_attr__value" : tensor<3072xbf16> %6593 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.122Fnorm2_context2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.122Fnorm2_context2FConstant_1_attr__value = util.global.load @"/transformer_blocks.12/norm2_context/Constant_1_attr__value" : tensor<3072xbf16> %6594 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.122Fnorm2_context2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %6595 = torch.operator "onnx.LayerNormalization"(%6592, %6593, %6594) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %6596 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6597 = torch.operator "onnx.Unsqueeze"(%6269, %6596) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %6598 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_Constant_7_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %6599 = torch.operator "onnx.Add"(%6597, %6598) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %6600 = torch.operator "onnx.Mul"(%6595, %6599) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %6601 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6602 = torch.operator "onnx.Unsqueeze"(%6266, %6601) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %6603 = torch.operator "onnx.Add"(%6600, %6602) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %6604 = torch.operator "onnx.MatMul"(%6603, %895) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %6605 = torch.operator "onnx.Add"(%272, %6604) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %6606 = torch.operator "onnx.Mul"(%6605, %6605) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %6607 = torch.operator "onnx.Mul"(%6605, %6606) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %6608 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_ff_context_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %6609 = torch.operator "onnx.Mul"(%6608, %6607) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %6610 = torch.operator "onnx.Add"(%6605, %6609) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %6611 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_ff_context_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %6612 = torch.operator "onnx.Mul"(%6611, %6610) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %6613 = torch.operator "onnx.Tanh"(%6612) : (!torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %6614 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_ff_context_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %6615 = torch.operator "onnx.Add"(%6614, %6613) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %6616 = torch.operator "onnx.Mul"(%6605, %6615) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %6617 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_ff_context_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %6618 = torch.operator "onnx.Mul"(%6617, %6616) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %6619 = torch.operator "onnx.MatMul"(%6618, %896) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %6620 = torch.operator "onnx.Add"(%273, %6619) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %6621 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.12_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6622 = torch.operator "onnx.Unsqueeze"(%6272, %6621) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %6623 = torch.operator "onnx.Mul"(%6622, %6620) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %6624 = torch.operator "onnx.Add"(%6592, %6623) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %6625 = torch.operator "onnx.Gemm"(%1285, %274, %275) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %6626 = torch.operator "onnx.Shape"(%6625) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %6627 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_norm1_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6628 = torch.operator "onnx.Gather"(%6626, %6627) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6629 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_norm1_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6630 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_norm1_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6631 = torch.operator "onnx.Add"(%6628, %6630) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6632 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_norm1_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6633 = torch.operator "onnx.Div"(%6631, %6632) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6634 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_norm1_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6635 = torch.operator "onnx.Mul"(%6633, %6634) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6636 = torch.operator "onnx.Slice"(%6625, %6629, %6635, %6627) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %6637 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_norm1_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6638 = torch.operator "onnx.Mul"(%6633, %6637) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6639 = torch.operator "onnx.Slice"(%6625, %6635, %6638, %6627) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %6640 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_norm1_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6641 = torch.operator "onnx.Mul"(%6633, %6640) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6642 = torch.operator "onnx.Slice"(%6625, %6638, %6641, %6627) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %6643 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_norm1_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6644 = torch.operator "onnx.Mul"(%6633, %6643) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6645 = torch.operator "onnx.Slice"(%6625, %6641, %6644, %6627) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %6646 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_norm1_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6647 = torch.operator "onnx.Mul"(%6633, %6646) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6648 = torch.operator "onnx.Slice"(%6625, %6644, %6647, %6627) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %6649 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_norm1_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6650 = torch.operator "onnx.Mul"(%6633, %6649) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6651 = torch.operator "onnx.Slice"(%6625, %6647, %6650, %6627) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.132Fnorm12Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.13/norm1/norm/Constant_attr__value" : tensor<3072xbf16> %6652 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.132Fnorm12Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.132Fnorm12Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.13/norm1/norm/Constant_1_attr__value" : tensor<3072xbf16> %6653 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.132Fnorm12Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %6654 = torch.operator "onnx.LayerNormalization"(%6588, %6652, %6653) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %6655 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_norm1_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6656 = torch.operator "onnx.Unsqueeze"(%6639, %6655) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %6657 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_norm1_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %6658 = torch.operator "onnx.Add"(%6656, %6657) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %6659 = torch.operator "onnx.Mul"(%6654, %6658) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %6660 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_norm1_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6661 = torch.operator "onnx.Unsqueeze"(%6636, %6660) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %6662 = torch.operator "onnx.Add"(%6659, %6661) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %6663 = torch.operator "onnx.Gemm"(%1285, %276, %277) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %6664 = torch.operator "onnx.Shape"(%6663) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %6665 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_norm1_context_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6666 = torch.operator "onnx.Gather"(%6664, %6665) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6667 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_norm1_context_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6668 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_norm1_context_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6669 = torch.operator "onnx.Add"(%6666, %6668) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6670 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_norm1_context_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6671 = torch.operator "onnx.Div"(%6669, %6670) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6672 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_norm1_context_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6673 = torch.operator "onnx.Mul"(%6671, %6672) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6674 = torch.operator "onnx.Slice"(%6663, %6667, %6673, %6665) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %6675 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_norm1_context_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6676 = torch.operator "onnx.Mul"(%6671, %6675) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6677 = torch.operator "onnx.Slice"(%6663, %6673, %6676, %6665) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %6678 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_norm1_context_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6679 = torch.operator "onnx.Mul"(%6671, %6678) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6680 = torch.operator "onnx.Slice"(%6663, %6676, %6679, %6665) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %6681 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_norm1_context_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6682 = torch.operator "onnx.Mul"(%6671, %6681) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6683 = torch.operator "onnx.Slice"(%6663, %6679, %6682, %6665) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %6684 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_norm1_context_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6685 = torch.operator "onnx.Mul"(%6671, %6684) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6686 = torch.operator "onnx.Slice"(%6663, %6682, %6685, %6665) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %6687 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_norm1_context_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6688 = torch.operator "onnx.Mul"(%6671, %6687) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6689 = torch.operator "onnx.Slice"(%6663, %6685, %6688, %6665) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.132Fnorm1_context2Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.13/norm1_context/norm/Constant_attr__value" : tensor<3072xbf16> %6690 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.132Fnorm1_context2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.132Fnorm1_context2Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.13/norm1_context/norm/Constant_1_attr__value" : tensor<3072xbf16> %6691 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.132Fnorm1_context2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %6692 = torch.operator "onnx.LayerNormalization"(%6624, %6690, %6691) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %6693 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_norm1_context_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6694 = torch.operator "onnx.Unsqueeze"(%6677, %6693) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %6695 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_norm1_context_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %6696 = torch.operator "onnx.Add"(%6694, %6695) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %6697 = torch.operator "onnx.Mul"(%6692, %6696) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %6698 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_norm1_context_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6699 = torch.operator "onnx.Unsqueeze"(%6674, %6698) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %6700 = torch.operator "onnx.Add"(%6697, %6699) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %6701 = torch.operator "onnx.Shape"(%6700) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %6702 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %6703 = torch.operator "onnx.Gather"(%6701, %6702) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %6704 = torch.operator "onnx.MatMul"(%6662, %897) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %6705 = torch.operator "onnx.Add"(%280, %6704) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %6706 = torch.operator "onnx.MatMul"(%6662, %898) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %6707 = torch.operator "onnx.Add"(%281, %6706) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %6708 = torch.operator "onnx.MatMul"(%6662, %899) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %6709 = torch.operator "onnx.Add"(%282, %6708) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %6710 = torch.operator "onnx.Shape"(%6707) : (!torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[3],si64> %6711 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %6712 = torch.operator "onnx.Gather"(%6710, %6711) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %6713 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %6714 = torch.operator "onnx.Div"(%6712, %6713) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %6715 = torch.operator "onnx.Cast"(%6714) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %6716 = torch.operator "onnx.Cast"(%6715) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %6717 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11259_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6718 = torch.operator "onnx.Unsqueeze"(%6703, %6717) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6719 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6720 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6721 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11263_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6722 = torch.operator "onnx.Unsqueeze"(%6716, %6721) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6723 = torch.operator "onnx.Concat"(%6718, %6719, %6720, %6722) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %6724 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11266_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6725 = torch.operator "onnx.Unsqueeze"(%6703, %6724) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6726 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6727 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6728 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11270_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6729 = torch.operator "onnx.Unsqueeze"(%6716, %6728) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6730 = torch.operator "onnx.Concat"(%6725, %6726, %6727, %6729) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %6731 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11273_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6732 = torch.operator "onnx.Unsqueeze"(%6703, %6731) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6733 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6734 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6735 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11277_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6736 = torch.operator "onnx.Unsqueeze"(%6716, %6735) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6737 = torch.operator "onnx.Concat"(%6732, %6733, %6734, %6736) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %6738 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11280_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6739 = torch.operator "onnx.Unsqueeze"(%6703, %6738) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6740 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6741 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6742 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11284_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6743 = torch.operator "onnx.Unsqueeze"(%6716, %6742) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6744 = torch.operator "onnx.Concat"(%6739, %6740, %6741, %6743) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %6745 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11287_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6746 = torch.operator "onnx.Unsqueeze"(%6703, %6745) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6747 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_11_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6748 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6749 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11291_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6750 = torch.operator "onnx.Unsqueeze"(%6716, %6749) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6751 = torch.operator "onnx.Concat"(%6746, %6747, %6748, %6750) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %6752 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11294_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6753 = torch.operator "onnx.Unsqueeze"(%6703, %6752) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6754 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6755 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_14_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6756 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11298_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6757 = torch.operator "onnx.Unsqueeze"(%6716, %6756) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6758 = torch.operator "onnx.Concat"(%6753, %6754, %6755, %6757) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %6759 = torch.operator "onnx.Reshape"(%6705, %6723) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %6760 = torch.operator "onnx.Transpose"(%6759) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %6761 = torch.operator "onnx.Reshape"(%6707, %6730) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %6762 = torch.operator "onnx.Transpose"(%6761) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %6763 = torch.operator "onnx.Reshape"(%6709, %6737) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %6764 = torch.operator "onnx.Transpose"(%6763) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %6765 = torch.operator "onnx.Cast"(%6760) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %6766 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %6767 = torch.operator "onnx.Pow"(%6765, %6766) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %6768 = torch.operator "onnx.ReduceMean"(%6767) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %6769 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %6770 = torch.operator "onnx.Add"(%6768, %6769) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %6771 = torch.operator "onnx.Sqrt"(%6770) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %6772 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %6773 = torch.operator "onnx.Div"(%6772, %6771) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %6774 = torch.operator "onnx.Cast"(%6760) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %6775 = torch.operator "onnx.Mul"(%6774, %6773) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %6776 = torch.operator "onnx.Cast"(%6775) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %6777 = torch.operator "onnx.Mul"(%6776, %278) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %6778 = torch.operator "onnx.Cast"(%6762) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %6779 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %6780 = torch.operator "onnx.Pow"(%6778, %6779) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %6781 = torch.operator "onnx.ReduceMean"(%6780) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %6782 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %6783 = torch.operator "onnx.Add"(%6781, %6782) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %6784 = torch.operator "onnx.Sqrt"(%6783) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %6785 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %6786 = torch.operator "onnx.Div"(%6785, %6784) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %6787 = torch.operator "onnx.Cast"(%6762) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %6788 = torch.operator "onnx.Mul"(%6787, %6786) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %6789 = torch.operator "onnx.Cast"(%6788) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %6790 = torch.operator "onnx.Mul"(%6789, %279) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %6791 = torch.operator "onnx.MatMul"(%6700, %900) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %6792 = torch.operator "onnx.Add"(%285, %6791) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %6793 = torch.operator "onnx.MatMul"(%6700, %901) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %6794 = torch.operator "onnx.Add"(%283, %6793) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %6795 = torch.operator "onnx.MatMul"(%6700, %902) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %6796 = torch.operator "onnx.Add"(%284, %6795) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %6797 = torch.operator "onnx.Reshape"(%6792, %6744) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %6798 = torch.operator "onnx.Transpose"(%6797) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %6799 = torch.operator "onnx.Reshape"(%6794, %6751) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %6800 = torch.operator "onnx.Transpose"(%6799) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %6801 = torch.operator "onnx.Reshape"(%6796, %6758) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %6802 = torch.operator "onnx.Transpose"(%6801) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %6803 = torch.operator "onnx.Cast"(%6798) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %6804 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_norm_added_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %6805 = torch.operator "onnx.Pow"(%6803, %6804) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %6806 = torch.operator "onnx.ReduceMean"(%6805) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %6807 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_norm_added_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %6808 = torch.operator "onnx.Add"(%6806, %6807) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %6809 = torch.operator "onnx.Sqrt"(%6808) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %6810 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_norm_added_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %6811 = torch.operator "onnx.Div"(%6810, %6809) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %6812 = torch.operator "onnx.Cast"(%6798) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %6813 = torch.operator "onnx.Mul"(%6812, %6811) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %6814 = torch.operator "onnx.Cast"(%6813) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %6815 = torch.operator "onnx.Mul"(%6814, %288) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %6816 = torch.operator "onnx.Cast"(%6800) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %6817 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_norm_added_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %6818 = torch.operator "onnx.Pow"(%6816, %6817) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %6819 = torch.operator "onnx.ReduceMean"(%6818) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %6820 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_norm_added_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %6821 = torch.operator "onnx.Add"(%6819, %6820) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %6822 = torch.operator "onnx.Sqrt"(%6821) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %6823 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_norm_added_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %6824 = torch.operator "onnx.Div"(%6823, %6822) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %6825 = torch.operator "onnx.Cast"(%6800) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %6826 = torch.operator "onnx.Mul"(%6825, %6824) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %6827 = torch.operator "onnx.Cast"(%6826) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %6828 = torch.operator "onnx.Mul"(%6827, %289) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %6829 = torch.operator "onnx.Concat"(%6815, %6777) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %6830 = torch.operator "onnx.Concat"(%6828, %6790) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %6831 = torch.operator "onnx.Concat"(%6802, %6764) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %6832 = torch.operator "onnx.Shape"(%6829) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %6833 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_15_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %6834 = torch.operator "onnx.Gather"(%6832, %6833) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %6835 = torch.operator "onnx.Shape"(%6829) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %6836 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_16_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %6837 = torch.operator "onnx.Gather"(%6835, %6836) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %6838 = torch.operator "onnx.Shape"(%6829) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %6839 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_17_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %6840 = torch.operator "onnx.Gather"(%6838, %6839) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %6841 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11383_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6842 = torch.operator "onnx.Unsqueeze"(%6834, %6841) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6843 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11385_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6844 = torch.operator "onnx.Unsqueeze"(%6837, %6843) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6845 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11387_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6846 = torch.operator "onnx.Unsqueeze"(%6840, %6845) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6847 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6848 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6849 = torch.operator "onnx.Concat"(%6842, %6844, %6846, %6847, %6848) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %6850 = torch.operator "onnx.Reshape"(%6829, %6849) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %6851 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_20_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %6852:2 = torch.operator "onnx.Split"(%6850, %6851) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %6853 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6854 = torch.operator "onnx.Squeeze"(%6852#0, %6853) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %6855 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6856 = torch.operator "onnx.Squeeze"(%6852#1, %6855) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %6857 = torch.operator "onnx.Neg"(%6856) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %6858 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_23_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6859 = torch.operator "onnx.Unsqueeze"(%6857, %6858) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %6860 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_24_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6861 = torch.operator "onnx.Unsqueeze"(%6854, %6860) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %6862 = torch.operator "onnx.Concat"(%6859, %6861) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %6863 = torch.operator "onnx.Shape"(%6862) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %6864 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_25_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6865 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6866 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6867 = torch.operator "onnx.Slice"(%6863, %6865, %6866, %6864) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %6868 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_28_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6869 = torch.operator "onnx.Concat"(%6867, %6868) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %6870 = torch.operator "onnx.Reshape"(%6862, %6869) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %6871 = torch.operator "onnx.Cast"(%6829) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %6872 = torch.operator "onnx.Mul"(%6871, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %6873 = torch.operator "onnx.Cast"(%6870) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %6874 = torch.operator "onnx.Mul"(%6873, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %6875 = torch.operator "onnx.Add"(%6872, %6874) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %6876 = torch.operator "onnx.Cast"(%6875) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %6877 = torch.operator "onnx.Shape"(%6830) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %6878 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_29_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %6879 = torch.operator "onnx.Gather"(%6877, %6878) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %6880 = torch.operator "onnx.Shape"(%6830) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %6881 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_30_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %6882 = torch.operator "onnx.Gather"(%6880, %6881) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %6883 = torch.operator "onnx.Shape"(%6830) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %6884 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_31_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %6885 = torch.operator "onnx.Gather"(%6883, %6884) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %6886 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11428_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6887 = torch.operator "onnx.Unsqueeze"(%6879, %6886) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6888 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11430_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6889 = torch.operator "onnx.Unsqueeze"(%6882, %6888) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6890 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11432_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6891 = torch.operator "onnx.Unsqueeze"(%6885, %6890) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6892 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6893 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6894 = torch.operator "onnx.Concat"(%6887, %6889, %6891, %6892, %6893) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %6895 = torch.operator "onnx.Reshape"(%6830, %6894) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %6896 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_34_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %6897:2 = torch.operator "onnx.Split"(%6895, %6896) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %6898 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6899 = torch.operator "onnx.Squeeze"(%6897#0, %6898) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %6900 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6901 = torch.operator "onnx.Squeeze"(%6897#1, %6900) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %6902 = torch.operator "onnx.Neg"(%6901) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %6903 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6904 = torch.operator "onnx.Unsqueeze"(%6902, %6903) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %6905 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6906 = torch.operator "onnx.Unsqueeze"(%6899, %6905) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %6907 = torch.operator "onnx.Concat"(%6904, %6906) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %6908 = torch.operator "onnx.Shape"(%6907) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %6909 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_39_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6910 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_40_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6911 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6912 = torch.operator "onnx.Slice"(%6908, %6910, %6911, %6909) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %6913 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_42_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6914 = torch.operator "onnx.Concat"(%6912, %6913) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %6915 = torch.operator "onnx.Reshape"(%6907, %6914) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %6916 = torch.operator "onnx.Cast"(%6830) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %6917 = torch.operator "onnx.Mul"(%6916, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %6918 = torch.operator "onnx.Cast"(%6915) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %6919 = torch.operator "onnx.Mul"(%6918, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %6920 = torch.operator "onnx.Add"(%6917, %6919) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %6921 = torch.operator "onnx.Cast"(%6920) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %6922 = torch.operator "onnx.Shape"(%6876) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %6923 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_43_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6924 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_44_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6925 = torch.operator "onnx.Slice"(%6922, %6923, %6924) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6926 = torch.operator "onnx.Cast"(%6925) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %6927 = torch.operator "onnx.Sqrt"(%6926) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %6928 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_45_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %6929 = torch.operator "onnx.Cast"(%6927) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %6930 = torch.operator "onnx.Div"(%6928, %6929) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %6931 = torch.operator "onnx.Cast"(%6930) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %6932 = torch.operator "onnx.Transpose"(%6921) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %6933 = torch.operator "onnx.Sqrt"(%6931) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %6934 = torch.operator "onnx.Mul"(%6876, %6933) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %6935 = torch.operator "onnx.Sqrt"(%6931) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %6936 = torch.operator "onnx.Mul"(%6932, %6935) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %6937 = torch.operator "onnx.MatMul"(%6934, %6936) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %6938 = torch.operator "onnx.Softmax"(%6937) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %6939 = torch.operator "onnx.MatMul"(%6938, %6831) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %6940 = torch.operator "onnx.Transpose"(%6939) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %6941 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_46_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %6942 = torch.operator "onnx.Mul"(%6716, %6941) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %6943 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11485_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6944 = torch.operator "onnx.Unsqueeze"(%6703, %6943) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6945 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_47_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6946 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11488_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6947 = torch.operator "onnx.Unsqueeze"(%6942, %6946) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6948 = torch.operator "onnx.Concat"(%6944, %6945, %6947) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %6949 = torch.operator "onnx.Reshape"(%6940, %6948) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %6950 = torch.operator "onnx.Cast"(%6949) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %6951 = torch.operator "onnx.Shape"(%6700) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %6952 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_48_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %6953 = torch.operator "onnx.Gather"(%6951, %6952) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %6954 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_49_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6955 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_50_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6956 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_51_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6957 = torch.operator "onnx.Unsqueeze"(%6953, %6956) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6958 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_52_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6959 = torch.operator "onnx.Slice"(%6950, %6955, %6957, %6954, %6958) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %6960 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_53_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6961 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_54_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6962 = torch.operator "onnx.Unsqueeze"(%6953, %6961) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %6963 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_55_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6964 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_attn_Constant_56_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6965 = torch.operator "onnx.Slice"(%6950, %6962, %6963, %6960, %6964) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %6966 = torch.operator "onnx.MatMul"(%6965, %903) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %6967 = torch.operator "onnx.Add"(%286, %6966) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %6968 = torch.operator "onnx.MatMul"(%6959, %904) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %6969 = torch.operator "onnx.Add"(%287, %6968) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %6970 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6971 = torch.operator "onnx.Unsqueeze"(%6642, %6970) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %6972 = torch.operator "onnx.Mul"(%6971, %6967) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %6973 = torch.operator "onnx.Add"(%6588, %6972) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %_2Ftransformer_blocks.132Fnorm22FConstant_attr__value = util.global.load @"/transformer_blocks.13/norm2/Constant_attr__value" : tensor<3072xbf16> %6974 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.132Fnorm22FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.132Fnorm22FConstant_1_attr__value = util.global.load @"/transformer_blocks.13/norm2/Constant_1_attr__value" : tensor<3072xbf16> %6975 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.132Fnorm22FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %6976 = torch.operator "onnx.LayerNormalization"(%6973, %6974, %6975) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %6977 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6978 = torch.operator "onnx.Unsqueeze"(%6648, %6977) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %6979 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %6980 = torch.operator "onnx.Add"(%6978, %6979) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %6981 = torch.operator "onnx.Mul"(%6976, %6980) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %6982 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %6983 = torch.operator "onnx.Unsqueeze"(%6645, %6982) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %6984 = torch.operator "onnx.Add"(%6981, %6983) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %6985 = torch.operator "onnx.MatMul"(%6984, %905) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %6986 = torch.operator "onnx.Add"(%290, %6985) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %6987 = torch.operator "onnx.Mul"(%6986, %6986) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %6988 = torch.operator "onnx.Mul"(%6986, %6987) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %6989 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_ff_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %6990 = torch.operator "onnx.Mul"(%6989, %6988) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %6991 = torch.operator "onnx.Add"(%6986, %6990) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %6992 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_ff_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %6993 = torch.operator "onnx.Mul"(%6992, %6991) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %6994 = torch.operator "onnx.Tanh"(%6993) : (!torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %6995 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_ff_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %6996 = torch.operator "onnx.Add"(%6995, %6994) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %6997 = torch.operator "onnx.Mul"(%6986, %6996) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %6998 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_ff_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %6999 = torch.operator "onnx.Mul"(%6998, %6997) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %7000 = torch.operator "onnx.MatMul"(%6999, %906) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7001 = torch.operator "onnx.Add"(%291, %7000) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7002 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7003 = torch.operator "onnx.Unsqueeze"(%6651, %7002) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %7004 = torch.operator "onnx.Mul"(%7003, %7001) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7005 = torch.operator "onnx.Add"(%6973, %7004) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7006 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7007 = torch.operator "onnx.Unsqueeze"(%6680, %7006) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %7008 = torch.operator "onnx.Mul"(%7007, %6969) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %7009 = torch.operator "onnx.Add"(%6624, %7008) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %_2Ftransformer_blocks.132Fnorm2_context2FConstant_attr__value = util.global.load @"/transformer_blocks.13/norm2_context/Constant_attr__value" : tensor<3072xbf16> %7010 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.132Fnorm2_context2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.132Fnorm2_context2FConstant_1_attr__value = util.global.load @"/transformer_blocks.13/norm2_context/Constant_1_attr__value" : tensor<3072xbf16> %7011 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.132Fnorm2_context2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %7012 = torch.operator "onnx.LayerNormalization"(%7009, %7010, %7011) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7013 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7014 = torch.operator "onnx.Unsqueeze"(%6686, %7013) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %7015 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_Constant_7_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %7016 = torch.operator "onnx.Add"(%7014, %7015) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %7017 = torch.operator "onnx.Mul"(%7012, %7016) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7018 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7019 = torch.operator "onnx.Unsqueeze"(%6683, %7018) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %7020 = torch.operator "onnx.Add"(%7017, %7019) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7021 = torch.operator "onnx.MatMul"(%7020, %907) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %7022 = torch.operator "onnx.Add"(%292, %7021) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %7023 = torch.operator "onnx.Mul"(%7022, %7022) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %7024 = torch.operator "onnx.Mul"(%7022, %7023) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %7025 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_ff_context_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %7026 = torch.operator "onnx.Mul"(%7025, %7024) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %7027 = torch.operator "onnx.Add"(%7022, %7026) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %7028 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_ff_context_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %7029 = torch.operator "onnx.Mul"(%7028, %7027) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %7030 = torch.operator "onnx.Tanh"(%7029) : (!torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %7031 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_ff_context_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %7032 = torch.operator "onnx.Add"(%7031, %7030) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %7033 = torch.operator "onnx.Mul"(%7022, %7032) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %7034 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_ff_context_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %7035 = torch.operator "onnx.Mul"(%7034, %7033) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %7036 = torch.operator "onnx.MatMul"(%7035, %908) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7037 = torch.operator "onnx.Add"(%293, %7036) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7038 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.13_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7039 = torch.operator "onnx.Unsqueeze"(%6689, %7038) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %7040 = torch.operator "onnx.Mul"(%7039, %7037) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7041 = torch.operator "onnx.Add"(%7009, %7040) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7042 = torch.operator "onnx.Gemm"(%1285, %294, %295) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %7043 = torch.operator "onnx.Shape"(%7042) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %7044 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_norm1_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7045 = torch.operator "onnx.Gather"(%7043, %7044) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7046 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_norm1_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7047 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_norm1_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7048 = torch.operator "onnx.Add"(%7045, %7047) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7049 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_norm1_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7050 = torch.operator "onnx.Div"(%7048, %7049) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7051 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_norm1_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7052 = torch.operator "onnx.Mul"(%7050, %7051) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7053 = torch.operator "onnx.Slice"(%7042, %7046, %7052, %7044) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %7054 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_norm1_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7055 = torch.operator "onnx.Mul"(%7050, %7054) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7056 = torch.operator "onnx.Slice"(%7042, %7052, %7055, %7044) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %7057 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_norm1_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7058 = torch.operator "onnx.Mul"(%7050, %7057) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7059 = torch.operator "onnx.Slice"(%7042, %7055, %7058, %7044) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %7060 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_norm1_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7061 = torch.operator "onnx.Mul"(%7050, %7060) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7062 = torch.operator "onnx.Slice"(%7042, %7058, %7061, %7044) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %7063 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_norm1_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7064 = torch.operator "onnx.Mul"(%7050, %7063) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7065 = torch.operator "onnx.Slice"(%7042, %7061, %7064, %7044) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %7066 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_norm1_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7067 = torch.operator "onnx.Mul"(%7050, %7066) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7068 = torch.operator "onnx.Slice"(%7042, %7064, %7067, %7044) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.142Fnorm12Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.14/norm1/norm/Constant_attr__value" : tensor<3072xbf16> %7069 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.142Fnorm12Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.142Fnorm12Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.14/norm1/norm/Constant_1_attr__value" : tensor<3072xbf16> %7070 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.142Fnorm12Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %7071 = torch.operator "onnx.LayerNormalization"(%7005, %7069, %7070) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7072 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_norm1_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7073 = torch.operator "onnx.Unsqueeze"(%7056, %7072) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %7074 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_norm1_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %7075 = torch.operator "onnx.Add"(%7073, %7074) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %7076 = torch.operator "onnx.Mul"(%7071, %7075) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7077 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_norm1_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7078 = torch.operator "onnx.Unsqueeze"(%7053, %7077) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %7079 = torch.operator "onnx.Add"(%7076, %7078) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7080 = torch.operator "onnx.Gemm"(%1285, %296, %297) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %7081 = torch.operator "onnx.Shape"(%7080) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %7082 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_norm1_context_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7083 = torch.operator "onnx.Gather"(%7081, %7082) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7084 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_norm1_context_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7085 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_norm1_context_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7086 = torch.operator "onnx.Add"(%7083, %7085) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7087 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_norm1_context_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7088 = torch.operator "onnx.Div"(%7086, %7087) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7089 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_norm1_context_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7090 = torch.operator "onnx.Mul"(%7088, %7089) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7091 = torch.operator "onnx.Slice"(%7080, %7084, %7090, %7082) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %7092 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_norm1_context_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7093 = torch.operator "onnx.Mul"(%7088, %7092) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7094 = torch.operator "onnx.Slice"(%7080, %7090, %7093, %7082) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %7095 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_norm1_context_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7096 = torch.operator "onnx.Mul"(%7088, %7095) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7097 = torch.operator "onnx.Slice"(%7080, %7093, %7096, %7082) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %7098 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_norm1_context_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7099 = torch.operator "onnx.Mul"(%7088, %7098) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7100 = torch.operator "onnx.Slice"(%7080, %7096, %7099, %7082) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %7101 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_norm1_context_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7102 = torch.operator "onnx.Mul"(%7088, %7101) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7103 = torch.operator "onnx.Slice"(%7080, %7099, %7102, %7082) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %7104 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_norm1_context_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7105 = torch.operator "onnx.Mul"(%7088, %7104) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7106 = torch.operator "onnx.Slice"(%7080, %7102, %7105, %7082) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.142Fnorm1_context2Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.14/norm1_context/norm/Constant_attr__value" : tensor<3072xbf16> %7107 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.142Fnorm1_context2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.142Fnorm1_context2Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.14/norm1_context/norm/Constant_1_attr__value" : tensor<3072xbf16> %7108 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.142Fnorm1_context2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %7109 = torch.operator "onnx.LayerNormalization"(%7041, %7107, %7108) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7110 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_norm1_context_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7111 = torch.operator "onnx.Unsqueeze"(%7094, %7110) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %7112 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_norm1_context_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %7113 = torch.operator "onnx.Add"(%7111, %7112) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %7114 = torch.operator "onnx.Mul"(%7109, %7113) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7115 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_norm1_context_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7116 = torch.operator "onnx.Unsqueeze"(%7091, %7115) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %7117 = torch.operator "onnx.Add"(%7114, %7116) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7118 = torch.operator "onnx.Shape"(%7117) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %7119 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %7120 = torch.operator "onnx.Gather"(%7118, %7119) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %7121 = torch.operator "onnx.MatMul"(%7079, %909) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7122 = torch.operator "onnx.Add"(%300, %7121) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7123 = torch.operator "onnx.MatMul"(%7079, %910) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7124 = torch.operator "onnx.Add"(%301, %7123) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7125 = torch.operator "onnx.MatMul"(%7079, %911) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7126 = torch.operator "onnx.Add"(%302, %7125) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7127 = torch.operator "onnx.Shape"(%7124) : (!torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[3],si64> %7128 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %7129 = torch.operator "onnx.Gather"(%7127, %7128) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %7130 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %7131 = torch.operator "onnx.Div"(%7129, %7130) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %7132 = torch.operator "onnx.Cast"(%7131) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %7133 = torch.operator "onnx.Cast"(%7132) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %7134 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11676_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7135 = torch.operator "onnx.Unsqueeze"(%7120, %7134) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7136 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7137 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7138 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11680_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7139 = torch.operator "onnx.Unsqueeze"(%7133, %7138) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7140 = torch.operator "onnx.Concat"(%7135, %7136, %7137, %7139) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %7141 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11683_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7142 = torch.operator "onnx.Unsqueeze"(%7120, %7141) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7143 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7144 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7145 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11687_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7146 = torch.operator "onnx.Unsqueeze"(%7133, %7145) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7147 = torch.operator "onnx.Concat"(%7142, %7143, %7144, %7146) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %7148 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11690_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7149 = torch.operator "onnx.Unsqueeze"(%7120, %7148) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7150 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7151 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7152 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11694_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7153 = torch.operator "onnx.Unsqueeze"(%7133, %7152) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7154 = torch.operator "onnx.Concat"(%7149, %7150, %7151, %7153) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %7155 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11697_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7156 = torch.operator "onnx.Unsqueeze"(%7120, %7155) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7157 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7158 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7159 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11701_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7160 = torch.operator "onnx.Unsqueeze"(%7133, %7159) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7161 = torch.operator "onnx.Concat"(%7156, %7157, %7158, %7160) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %7162 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11704_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7163 = torch.operator "onnx.Unsqueeze"(%7120, %7162) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7164 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_11_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7165 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7166 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11708_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7167 = torch.operator "onnx.Unsqueeze"(%7133, %7166) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7168 = torch.operator "onnx.Concat"(%7163, %7164, %7165, %7167) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %7169 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11711_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7170 = torch.operator "onnx.Unsqueeze"(%7120, %7169) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7171 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7172 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_14_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7173 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11715_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7174 = torch.operator "onnx.Unsqueeze"(%7133, %7173) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7175 = torch.operator "onnx.Concat"(%7170, %7171, %7172, %7174) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %7176 = torch.operator "onnx.Reshape"(%7122, %7140) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %7177 = torch.operator "onnx.Transpose"(%7176) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %7178 = torch.operator "onnx.Reshape"(%7124, %7147) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %7179 = torch.operator "onnx.Transpose"(%7178) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %7180 = torch.operator "onnx.Reshape"(%7126, %7154) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %7181 = torch.operator "onnx.Transpose"(%7180) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %7182 = torch.operator "onnx.Cast"(%7177) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %7183 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %7184 = torch.operator "onnx.Pow"(%7182, %7183) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %7185 = torch.operator "onnx.ReduceMean"(%7184) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %7186 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %7187 = torch.operator "onnx.Add"(%7185, %7186) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %7188 = torch.operator "onnx.Sqrt"(%7187) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %7189 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %7190 = torch.operator "onnx.Div"(%7189, %7188) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %7191 = torch.operator "onnx.Cast"(%7177) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %7192 = torch.operator "onnx.Mul"(%7191, %7190) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %7193 = torch.operator "onnx.Cast"(%7192) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %7194 = torch.operator "onnx.Mul"(%7193, %298) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %7195 = torch.operator "onnx.Cast"(%7179) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %7196 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %7197 = torch.operator "onnx.Pow"(%7195, %7196) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %7198 = torch.operator "onnx.ReduceMean"(%7197) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %7199 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %7200 = torch.operator "onnx.Add"(%7198, %7199) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %7201 = torch.operator "onnx.Sqrt"(%7200) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %7202 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %7203 = torch.operator "onnx.Div"(%7202, %7201) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %7204 = torch.operator "onnx.Cast"(%7179) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %7205 = torch.operator "onnx.Mul"(%7204, %7203) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %7206 = torch.operator "onnx.Cast"(%7205) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %7207 = torch.operator "onnx.Mul"(%7206, %299) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %7208 = torch.operator "onnx.MatMul"(%7117, %912) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7209 = torch.operator "onnx.Add"(%305, %7208) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7210 = torch.operator "onnx.MatMul"(%7117, %913) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7211 = torch.operator "onnx.Add"(%303, %7210) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7212 = torch.operator "onnx.MatMul"(%7117, %914) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7213 = torch.operator "onnx.Add"(%304, %7212) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7214 = torch.operator "onnx.Reshape"(%7209, %7161) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %7215 = torch.operator "onnx.Transpose"(%7214) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %7216 = torch.operator "onnx.Reshape"(%7211, %7168) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %7217 = torch.operator "onnx.Transpose"(%7216) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %7218 = torch.operator "onnx.Reshape"(%7213, %7175) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %7219 = torch.operator "onnx.Transpose"(%7218) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %7220 = torch.operator "onnx.Cast"(%7215) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %7221 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_norm_added_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %7222 = torch.operator "onnx.Pow"(%7220, %7221) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %7223 = torch.operator "onnx.ReduceMean"(%7222) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %7224 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_norm_added_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %7225 = torch.operator "onnx.Add"(%7223, %7224) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %7226 = torch.operator "onnx.Sqrt"(%7225) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %7227 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_norm_added_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %7228 = torch.operator "onnx.Div"(%7227, %7226) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %7229 = torch.operator "onnx.Cast"(%7215) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %7230 = torch.operator "onnx.Mul"(%7229, %7228) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %7231 = torch.operator "onnx.Cast"(%7230) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %7232 = torch.operator "onnx.Mul"(%7231, %308) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %7233 = torch.operator "onnx.Cast"(%7217) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %7234 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_norm_added_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %7235 = torch.operator "onnx.Pow"(%7233, %7234) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %7236 = torch.operator "onnx.ReduceMean"(%7235) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %7237 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_norm_added_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %7238 = torch.operator "onnx.Add"(%7236, %7237) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %7239 = torch.operator "onnx.Sqrt"(%7238) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %7240 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_norm_added_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %7241 = torch.operator "onnx.Div"(%7240, %7239) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %7242 = torch.operator "onnx.Cast"(%7217) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %7243 = torch.operator "onnx.Mul"(%7242, %7241) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %7244 = torch.operator "onnx.Cast"(%7243) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %7245 = torch.operator "onnx.Mul"(%7244, %309) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %7246 = torch.operator "onnx.Concat"(%7232, %7194) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %7247 = torch.operator "onnx.Concat"(%7245, %7207) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %7248 = torch.operator "onnx.Concat"(%7219, %7181) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %7249 = torch.operator "onnx.Shape"(%7246) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %7250 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_15_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %7251 = torch.operator "onnx.Gather"(%7249, %7250) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %7252 = torch.operator "onnx.Shape"(%7246) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %7253 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_16_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %7254 = torch.operator "onnx.Gather"(%7252, %7253) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %7255 = torch.operator "onnx.Shape"(%7246) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %7256 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_17_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %7257 = torch.operator "onnx.Gather"(%7255, %7256) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %7258 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11800_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7259 = torch.operator "onnx.Unsqueeze"(%7251, %7258) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7260 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11802_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7261 = torch.operator "onnx.Unsqueeze"(%7254, %7260) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7262 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11804_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7263 = torch.operator "onnx.Unsqueeze"(%7257, %7262) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7264 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7265 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7266 = torch.operator "onnx.Concat"(%7259, %7261, %7263, %7264, %7265) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %7267 = torch.operator "onnx.Reshape"(%7246, %7266) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %7268 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_20_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %7269:2 = torch.operator "onnx.Split"(%7267, %7268) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %7270 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7271 = torch.operator "onnx.Squeeze"(%7269#0, %7270) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %7272 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7273 = torch.operator "onnx.Squeeze"(%7269#1, %7272) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %7274 = torch.operator "onnx.Neg"(%7273) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %7275 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_23_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7276 = torch.operator "onnx.Unsqueeze"(%7274, %7275) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %7277 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_24_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7278 = torch.operator "onnx.Unsqueeze"(%7271, %7277) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %7279 = torch.operator "onnx.Concat"(%7276, %7278) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %7280 = torch.operator "onnx.Shape"(%7279) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %7281 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_25_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7282 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7283 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7284 = torch.operator "onnx.Slice"(%7280, %7282, %7283, %7281) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %7285 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_28_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7286 = torch.operator "onnx.Concat"(%7284, %7285) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %7287 = torch.operator "onnx.Reshape"(%7279, %7286) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %7288 = torch.operator "onnx.Cast"(%7246) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %7289 = torch.operator "onnx.Mul"(%7288, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %7290 = torch.operator "onnx.Cast"(%7287) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %7291 = torch.operator "onnx.Mul"(%7290, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %7292 = torch.operator "onnx.Add"(%7289, %7291) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %7293 = torch.operator "onnx.Cast"(%7292) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %7294 = torch.operator "onnx.Shape"(%7247) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %7295 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_29_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %7296 = torch.operator "onnx.Gather"(%7294, %7295) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %7297 = torch.operator "onnx.Shape"(%7247) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %7298 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_30_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %7299 = torch.operator "onnx.Gather"(%7297, %7298) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %7300 = torch.operator "onnx.Shape"(%7247) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %7301 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_31_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %7302 = torch.operator "onnx.Gather"(%7300, %7301) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %7303 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11845_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7304 = torch.operator "onnx.Unsqueeze"(%7296, %7303) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7305 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11847_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7306 = torch.operator "onnx.Unsqueeze"(%7299, %7305) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7307 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11849_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7308 = torch.operator "onnx.Unsqueeze"(%7302, %7307) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7309 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7310 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7311 = torch.operator "onnx.Concat"(%7304, %7306, %7308, %7309, %7310) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %7312 = torch.operator "onnx.Reshape"(%7247, %7311) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %7313 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_34_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %7314:2 = torch.operator "onnx.Split"(%7312, %7313) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %7315 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7316 = torch.operator "onnx.Squeeze"(%7314#0, %7315) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %7317 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7318 = torch.operator "onnx.Squeeze"(%7314#1, %7317) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %7319 = torch.operator "onnx.Neg"(%7318) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %7320 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7321 = torch.operator "onnx.Unsqueeze"(%7319, %7320) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %7322 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7323 = torch.operator "onnx.Unsqueeze"(%7316, %7322) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %7324 = torch.operator "onnx.Concat"(%7321, %7323) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %7325 = torch.operator "onnx.Shape"(%7324) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %7326 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_39_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7327 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_40_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7328 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7329 = torch.operator "onnx.Slice"(%7325, %7327, %7328, %7326) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %7330 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_42_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7331 = torch.operator "onnx.Concat"(%7329, %7330) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %7332 = torch.operator "onnx.Reshape"(%7324, %7331) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %7333 = torch.operator "onnx.Cast"(%7247) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %7334 = torch.operator "onnx.Mul"(%7333, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %7335 = torch.operator "onnx.Cast"(%7332) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %7336 = torch.operator "onnx.Mul"(%7335, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %7337 = torch.operator "onnx.Add"(%7334, %7336) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %7338 = torch.operator "onnx.Cast"(%7337) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %7339 = torch.operator "onnx.Shape"(%7293) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %7340 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_43_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7341 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_44_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7342 = torch.operator "onnx.Slice"(%7339, %7340, %7341) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7343 = torch.operator "onnx.Cast"(%7342) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %7344 = torch.operator "onnx.Sqrt"(%7343) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %7345 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_45_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %7346 = torch.operator "onnx.Cast"(%7344) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %7347 = torch.operator "onnx.Div"(%7345, %7346) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %7348 = torch.operator "onnx.Cast"(%7347) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %7349 = torch.operator "onnx.Transpose"(%7338) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %7350 = torch.operator "onnx.Sqrt"(%7348) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %7351 = torch.operator "onnx.Mul"(%7293, %7350) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %7352 = torch.operator "onnx.Sqrt"(%7348) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %7353 = torch.operator "onnx.Mul"(%7349, %7352) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %7354 = torch.operator "onnx.MatMul"(%7351, %7353) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %7355 = torch.operator "onnx.Softmax"(%7354) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %7356 = torch.operator "onnx.MatMul"(%7355, %7248) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %7357 = torch.operator "onnx.Transpose"(%7356) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %7358 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_46_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %7359 = torch.operator "onnx.Mul"(%7133, %7358) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %7360 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11902_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7361 = torch.operator "onnx.Unsqueeze"(%7120, %7360) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7362 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_47_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7363 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_11905_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7364 = torch.operator "onnx.Unsqueeze"(%7359, %7363) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7365 = torch.operator "onnx.Concat"(%7361, %7362, %7364) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %7366 = torch.operator "onnx.Reshape"(%7357, %7365) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %7367 = torch.operator "onnx.Cast"(%7366) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %7368 = torch.operator "onnx.Shape"(%7117) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %7369 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_48_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %7370 = torch.operator "onnx.Gather"(%7368, %7369) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %7371 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_49_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7372 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_50_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7373 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_51_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7374 = torch.operator "onnx.Unsqueeze"(%7370, %7373) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7375 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_52_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7376 = torch.operator "onnx.Slice"(%7367, %7372, %7374, %7371, %7375) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %7377 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_53_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7378 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_54_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7379 = torch.operator "onnx.Unsqueeze"(%7370, %7378) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7380 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_55_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7381 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_attn_Constant_56_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7382 = torch.operator "onnx.Slice"(%7367, %7379, %7380, %7377, %7381) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %7383 = torch.operator "onnx.MatMul"(%7382, %915) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %7384 = torch.operator "onnx.Add"(%306, %7383) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %7385 = torch.operator "onnx.MatMul"(%7376, %916) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %7386 = torch.operator "onnx.Add"(%307, %7385) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %7387 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7388 = torch.operator "onnx.Unsqueeze"(%7059, %7387) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %7389 = torch.operator "onnx.Mul"(%7388, %7384) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %7390 = torch.operator "onnx.Add"(%7005, %7389) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %_2Ftransformer_blocks.142Fnorm22FConstant_attr__value = util.global.load @"/transformer_blocks.14/norm2/Constant_attr__value" : tensor<3072xbf16> %7391 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.142Fnorm22FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.142Fnorm22FConstant_1_attr__value = util.global.load @"/transformer_blocks.14/norm2/Constant_1_attr__value" : tensor<3072xbf16> %7392 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.142Fnorm22FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %7393 = torch.operator "onnx.LayerNormalization"(%7390, %7391, %7392) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7394 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7395 = torch.operator "onnx.Unsqueeze"(%7065, %7394) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %7396 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %7397 = torch.operator "onnx.Add"(%7395, %7396) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %7398 = torch.operator "onnx.Mul"(%7393, %7397) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7399 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7400 = torch.operator "onnx.Unsqueeze"(%7062, %7399) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %7401 = torch.operator "onnx.Add"(%7398, %7400) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7402 = torch.operator "onnx.MatMul"(%7401, %917) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %7403 = torch.operator "onnx.Add"(%310, %7402) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %7404 = torch.operator "onnx.Mul"(%7403, %7403) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %7405 = torch.operator "onnx.Mul"(%7403, %7404) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %7406 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_ff_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %7407 = torch.operator "onnx.Mul"(%7406, %7405) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %7408 = torch.operator "onnx.Add"(%7403, %7407) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %7409 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_ff_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %7410 = torch.operator "onnx.Mul"(%7409, %7408) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %7411 = torch.operator "onnx.Tanh"(%7410) : (!torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %7412 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_ff_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %7413 = torch.operator "onnx.Add"(%7412, %7411) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %7414 = torch.operator "onnx.Mul"(%7403, %7413) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %7415 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_ff_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %7416 = torch.operator "onnx.Mul"(%7415, %7414) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %7417 = torch.operator "onnx.MatMul"(%7416, %918) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7418 = torch.operator "onnx.Add"(%311, %7417) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7419 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7420 = torch.operator "onnx.Unsqueeze"(%7068, %7419) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %7421 = torch.operator "onnx.Mul"(%7420, %7418) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7422 = torch.operator "onnx.Add"(%7390, %7421) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7423 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7424 = torch.operator "onnx.Unsqueeze"(%7097, %7423) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %7425 = torch.operator "onnx.Mul"(%7424, %7386) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %7426 = torch.operator "onnx.Add"(%7041, %7425) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %_2Ftransformer_blocks.142Fnorm2_context2FConstant_attr__value = util.global.load @"/transformer_blocks.14/norm2_context/Constant_attr__value" : tensor<3072xbf16> %7427 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.142Fnorm2_context2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.142Fnorm2_context2FConstant_1_attr__value = util.global.load @"/transformer_blocks.14/norm2_context/Constant_1_attr__value" : tensor<3072xbf16> %7428 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.142Fnorm2_context2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %7429 = torch.operator "onnx.LayerNormalization"(%7426, %7427, %7428) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7430 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7431 = torch.operator "onnx.Unsqueeze"(%7103, %7430) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %7432 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_Constant_7_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %7433 = torch.operator "onnx.Add"(%7431, %7432) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %7434 = torch.operator "onnx.Mul"(%7429, %7433) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7435 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7436 = torch.operator "onnx.Unsqueeze"(%7100, %7435) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %7437 = torch.operator "onnx.Add"(%7434, %7436) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7438 = torch.operator "onnx.MatMul"(%7437, %919) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %7439 = torch.operator "onnx.Add"(%312, %7438) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %7440 = torch.operator "onnx.Mul"(%7439, %7439) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %7441 = torch.operator "onnx.Mul"(%7439, %7440) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %7442 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_ff_context_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %7443 = torch.operator "onnx.Mul"(%7442, %7441) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %7444 = torch.operator "onnx.Add"(%7439, %7443) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %7445 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_ff_context_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %7446 = torch.operator "onnx.Mul"(%7445, %7444) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %7447 = torch.operator "onnx.Tanh"(%7446) : (!torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %7448 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_ff_context_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %7449 = torch.operator "onnx.Add"(%7448, %7447) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %7450 = torch.operator "onnx.Mul"(%7439, %7449) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %7451 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_ff_context_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %7452 = torch.operator "onnx.Mul"(%7451, %7450) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %7453 = torch.operator "onnx.MatMul"(%7452, %920) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7454 = torch.operator "onnx.Add"(%313, %7453) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7455 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.14_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7456 = torch.operator "onnx.Unsqueeze"(%7106, %7455) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %7457 = torch.operator "onnx.Mul"(%7456, %7454) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7458 = torch.operator "onnx.Add"(%7426, %7457) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7459 = torch.operator "onnx.Gemm"(%1285, %314, %315) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %7460 = torch.operator "onnx.Shape"(%7459) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %7461 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_norm1_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7462 = torch.operator "onnx.Gather"(%7460, %7461) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7463 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_norm1_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7464 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_norm1_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7465 = torch.operator "onnx.Add"(%7462, %7464) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7466 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_norm1_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7467 = torch.operator "onnx.Div"(%7465, %7466) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7468 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_norm1_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7469 = torch.operator "onnx.Mul"(%7467, %7468) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7470 = torch.operator "onnx.Slice"(%7459, %7463, %7469, %7461) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %7471 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_norm1_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7472 = torch.operator "onnx.Mul"(%7467, %7471) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7473 = torch.operator "onnx.Slice"(%7459, %7469, %7472, %7461) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %7474 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_norm1_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7475 = torch.operator "onnx.Mul"(%7467, %7474) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7476 = torch.operator "onnx.Slice"(%7459, %7472, %7475, %7461) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %7477 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_norm1_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7478 = torch.operator "onnx.Mul"(%7467, %7477) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7479 = torch.operator "onnx.Slice"(%7459, %7475, %7478, %7461) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %7480 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_norm1_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7481 = torch.operator "onnx.Mul"(%7467, %7480) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7482 = torch.operator "onnx.Slice"(%7459, %7478, %7481, %7461) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %7483 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_norm1_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7484 = torch.operator "onnx.Mul"(%7467, %7483) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7485 = torch.operator "onnx.Slice"(%7459, %7481, %7484, %7461) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.152Fnorm12Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.15/norm1/norm/Constant_attr__value" : tensor<3072xbf16> %7486 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.152Fnorm12Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.152Fnorm12Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.15/norm1/norm/Constant_1_attr__value" : tensor<3072xbf16> %7487 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.152Fnorm12Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %7488 = torch.operator "onnx.LayerNormalization"(%7422, %7486, %7487) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7489 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_norm1_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7490 = torch.operator "onnx.Unsqueeze"(%7473, %7489) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %7491 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_norm1_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %7492 = torch.operator "onnx.Add"(%7490, %7491) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %7493 = torch.operator "onnx.Mul"(%7488, %7492) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7494 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_norm1_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7495 = torch.operator "onnx.Unsqueeze"(%7470, %7494) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %7496 = torch.operator "onnx.Add"(%7493, %7495) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7497 = torch.operator "onnx.Gemm"(%1285, %316, %317) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %7498 = torch.operator "onnx.Shape"(%7497) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %7499 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_norm1_context_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7500 = torch.operator "onnx.Gather"(%7498, %7499) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7501 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_norm1_context_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7502 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_norm1_context_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7503 = torch.operator "onnx.Add"(%7500, %7502) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7504 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_norm1_context_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7505 = torch.operator "onnx.Div"(%7503, %7504) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7506 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_norm1_context_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7507 = torch.operator "onnx.Mul"(%7505, %7506) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7508 = torch.operator "onnx.Slice"(%7497, %7501, %7507, %7499) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %7509 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_norm1_context_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7510 = torch.operator "onnx.Mul"(%7505, %7509) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7511 = torch.operator "onnx.Slice"(%7497, %7507, %7510, %7499) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %7512 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_norm1_context_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7513 = torch.operator "onnx.Mul"(%7505, %7512) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7514 = torch.operator "onnx.Slice"(%7497, %7510, %7513, %7499) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %7515 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_norm1_context_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7516 = torch.operator "onnx.Mul"(%7505, %7515) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7517 = torch.operator "onnx.Slice"(%7497, %7513, %7516, %7499) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %7518 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_norm1_context_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7519 = torch.operator "onnx.Mul"(%7505, %7518) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7520 = torch.operator "onnx.Slice"(%7497, %7516, %7519, %7499) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %7521 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_norm1_context_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7522 = torch.operator "onnx.Mul"(%7505, %7521) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7523 = torch.operator "onnx.Slice"(%7497, %7519, %7522, %7499) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.152Fnorm1_context2Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.15/norm1_context/norm/Constant_attr__value" : tensor<3072xbf16> %7524 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.152Fnorm1_context2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.152Fnorm1_context2Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.15/norm1_context/norm/Constant_1_attr__value" : tensor<3072xbf16> %7525 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.152Fnorm1_context2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %7526 = torch.operator "onnx.LayerNormalization"(%7458, %7524, %7525) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7527 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_norm1_context_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7528 = torch.operator "onnx.Unsqueeze"(%7511, %7527) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %7529 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_norm1_context_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %7530 = torch.operator "onnx.Add"(%7528, %7529) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %7531 = torch.operator "onnx.Mul"(%7526, %7530) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7532 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_norm1_context_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7533 = torch.operator "onnx.Unsqueeze"(%7508, %7532) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %7534 = torch.operator "onnx.Add"(%7531, %7533) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7535 = torch.operator "onnx.Shape"(%7534) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %7536 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %7537 = torch.operator "onnx.Gather"(%7535, %7536) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %7538 = torch.operator "onnx.MatMul"(%7496, %921) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7539 = torch.operator "onnx.Add"(%320, %7538) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7540 = torch.operator "onnx.MatMul"(%7496, %922) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7541 = torch.operator "onnx.Add"(%321, %7540) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7542 = torch.operator "onnx.MatMul"(%7496, %923) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7543 = torch.operator "onnx.Add"(%322, %7542) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7544 = torch.operator "onnx.Shape"(%7541) : (!torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[3],si64> %7545 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %7546 = torch.operator "onnx.Gather"(%7544, %7545) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %7547 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %7548 = torch.operator "onnx.Div"(%7546, %7547) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %7549 = torch.operator "onnx.Cast"(%7548) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %7550 = torch.operator "onnx.Cast"(%7549) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %7551 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12093_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7552 = torch.operator "onnx.Unsqueeze"(%7537, %7551) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7553 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7554 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7555 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12097_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7556 = torch.operator "onnx.Unsqueeze"(%7550, %7555) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7557 = torch.operator "onnx.Concat"(%7552, %7553, %7554, %7556) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %7558 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12100_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7559 = torch.operator "onnx.Unsqueeze"(%7537, %7558) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7560 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7561 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7562 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12104_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7563 = torch.operator "onnx.Unsqueeze"(%7550, %7562) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7564 = torch.operator "onnx.Concat"(%7559, %7560, %7561, %7563) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %7565 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12107_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7566 = torch.operator "onnx.Unsqueeze"(%7537, %7565) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7567 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7568 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7569 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12111_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7570 = torch.operator "onnx.Unsqueeze"(%7550, %7569) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7571 = torch.operator "onnx.Concat"(%7566, %7567, %7568, %7570) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %7572 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12114_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7573 = torch.operator "onnx.Unsqueeze"(%7537, %7572) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7574 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7575 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7576 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12118_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7577 = torch.operator "onnx.Unsqueeze"(%7550, %7576) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7578 = torch.operator "onnx.Concat"(%7573, %7574, %7575, %7577) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %7579 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12121_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7580 = torch.operator "onnx.Unsqueeze"(%7537, %7579) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7581 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_11_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7582 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7583 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12125_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7584 = torch.operator "onnx.Unsqueeze"(%7550, %7583) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7585 = torch.operator "onnx.Concat"(%7580, %7581, %7582, %7584) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %7586 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12128_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7587 = torch.operator "onnx.Unsqueeze"(%7537, %7586) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7588 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7589 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_14_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7590 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12132_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7591 = torch.operator "onnx.Unsqueeze"(%7550, %7590) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7592 = torch.operator "onnx.Concat"(%7587, %7588, %7589, %7591) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %7593 = torch.operator "onnx.Reshape"(%7539, %7557) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %7594 = torch.operator "onnx.Transpose"(%7593) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %7595 = torch.operator "onnx.Reshape"(%7541, %7564) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %7596 = torch.operator "onnx.Transpose"(%7595) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %7597 = torch.operator "onnx.Reshape"(%7543, %7571) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %7598 = torch.operator "onnx.Transpose"(%7597) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %7599 = torch.operator "onnx.Cast"(%7594) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %7600 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %7601 = torch.operator "onnx.Pow"(%7599, %7600) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %7602 = torch.operator "onnx.ReduceMean"(%7601) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %7603 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %7604 = torch.operator "onnx.Add"(%7602, %7603) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %7605 = torch.operator "onnx.Sqrt"(%7604) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %7606 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %7607 = torch.operator "onnx.Div"(%7606, %7605) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %7608 = torch.operator "onnx.Cast"(%7594) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %7609 = torch.operator "onnx.Mul"(%7608, %7607) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %7610 = torch.operator "onnx.Cast"(%7609) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %7611 = torch.operator "onnx.Mul"(%7610, %318) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %7612 = torch.operator "onnx.Cast"(%7596) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %7613 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %7614 = torch.operator "onnx.Pow"(%7612, %7613) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %7615 = torch.operator "onnx.ReduceMean"(%7614) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %7616 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %7617 = torch.operator "onnx.Add"(%7615, %7616) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %7618 = torch.operator "onnx.Sqrt"(%7617) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %7619 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %7620 = torch.operator "onnx.Div"(%7619, %7618) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %7621 = torch.operator "onnx.Cast"(%7596) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %7622 = torch.operator "onnx.Mul"(%7621, %7620) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %7623 = torch.operator "onnx.Cast"(%7622) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %7624 = torch.operator "onnx.Mul"(%7623, %319) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %7625 = torch.operator "onnx.MatMul"(%7534, %924) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7626 = torch.operator "onnx.Add"(%325, %7625) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7627 = torch.operator "onnx.MatMul"(%7534, %925) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7628 = torch.operator "onnx.Add"(%323, %7627) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7629 = torch.operator "onnx.MatMul"(%7534, %926) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7630 = torch.operator "onnx.Add"(%324, %7629) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7631 = torch.operator "onnx.Reshape"(%7626, %7578) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %7632 = torch.operator "onnx.Transpose"(%7631) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %7633 = torch.operator "onnx.Reshape"(%7628, %7585) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %7634 = torch.operator "onnx.Transpose"(%7633) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %7635 = torch.operator "onnx.Reshape"(%7630, %7592) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %7636 = torch.operator "onnx.Transpose"(%7635) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %7637 = torch.operator "onnx.Cast"(%7632) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %7638 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_norm_added_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %7639 = torch.operator "onnx.Pow"(%7637, %7638) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %7640 = torch.operator "onnx.ReduceMean"(%7639) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %7641 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_norm_added_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %7642 = torch.operator "onnx.Add"(%7640, %7641) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %7643 = torch.operator "onnx.Sqrt"(%7642) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %7644 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_norm_added_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %7645 = torch.operator "onnx.Div"(%7644, %7643) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %7646 = torch.operator "onnx.Cast"(%7632) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %7647 = torch.operator "onnx.Mul"(%7646, %7645) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %7648 = torch.operator "onnx.Cast"(%7647) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %7649 = torch.operator "onnx.Mul"(%7648, %328) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %7650 = torch.operator "onnx.Cast"(%7634) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %7651 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_norm_added_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %7652 = torch.operator "onnx.Pow"(%7650, %7651) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %7653 = torch.operator "onnx.ReduceMean"(%7652) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %7654 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_norm_added_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %7655 = torch.operator "onnx.Add"(%7653, %7654) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %7656 = torch.operator "onnx.Sqrt"(%7655) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %7657 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_norm_added_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %7658 = torch.operator "onnx.Div"(%7657, %7656) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %7659 = torch.operator "onnx.Cast"(%7634) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %7660 = torch.operator "onnx.Mul"(%7659, %7658) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %7661 = torch.operator "onnx.Cast"(%7660) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %7662 = torch.operator "onnx.Mul"(%7661, %329) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %7663 = torch.operator "onnx.Concat"(%7649, %7611) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %7664 = torch.operator "onnx.Concat"(%7662, %7624) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %7665 = torch.operator "onnx.Concat"(%7636, %7598) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %7666 = torch.operator "onnx.Shape"(%7663) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %7667 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_15_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %7668 = torch.operator "onnx.Gather"(%7666, %7667) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %7669 = torch.operator "onnx.Shape"(%7663) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %7670 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_16_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %7671 = torch.operator "onnx.Gather"(%7669, %7670) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %7672 = torch.operator "onnx.Shape"(%7663) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %7673 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_17_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %7674 = torch.operator "onnx.Gather"(%7672, %7673) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %7675 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12217_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7676 = torch.operator "onnx.Unsqueeze"(%7668, %7675) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7677 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12219_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7678 = torch.operator "onnx.Unsqueeze"(%7671, %7677) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7679 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12221_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7680 = torch.operator "onnx.Unsqueeze"(%7674, %7679) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7681 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7682 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7683 = torch.operator "onnx.Concat"(%7676, %7678, %7680, %7681, %7682) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %7684 = torch.operator "onnx.Reshape"(%7663, %7683) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %7685 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_20_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %7686:2 = torch.operator "onnx.Split"(%7684, %7685) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %7687 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7688 = torch.operator "onnx.Squeeze"(%7686#0, %7687) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %7689 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7690 = torch.operator "onnx.Squeeze"(%7686#1, %7689) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %7691 = torch.operator "onnx.Neg"(%7690) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %7692 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_23_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7693 = torch.operator "onnx.Unsqueeze"(%7691, %7692) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %7694 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_24_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7695 = torch.operator "onnx.Unsqueeze"(%7688, %7694) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %7696 = torch.operator "onnx.Concat"(%7693, %7695) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %7697 = torch.operator "onnx.Shape"(%7696) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %7698 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_25_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7699 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7700 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7701 = torch.operator "onnx.Slice"(%7697, %7699, %7700, %7698) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %7702 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_28_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7703 = torch.operator "onnx.Concat"(%7701, %7702) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %7704 = torch.operator "onnx.Reshape"(%7696, %7703) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %7705 = torch.operator "onnx.Cast"(%7663) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %7706 = torch.operator "onnx.Mul"(%7705, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %7707 = torch.operator "onnx.Cast"(%7704) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %7708 = torch.operator "onnx.Mul"(%7707, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %7709 = torch.operator "onnx.Add"(%7706, %7708) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %7710 = torch.operator "onnx.Cast"(%7709) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %7711 = torch.operator "onnx.Shape"(%7664) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %7712 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_29_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %7713 = torch.operator "onnx.Gather"(%7711, %7712) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %7714 = torch.operator "onnx.Shape"(%7664) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %7715 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_30_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %7716 = torch.operator "onnx.Gather"(%7714, %7715) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %7717 = torch.operator "onnx.Shape"(%7664) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %7718 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_31_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %7719 = torch.operator "onnx.Gather"(%7717, %7718) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %7720 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12262_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7721 = torch.operator "onnx.Unsqueeze"(%7713, %7720) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7722 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12264_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7723 = torch.operator "onnx.Unsqueeze"(%7716, %7722) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7724 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12266_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7725 = torch.operator "onnx.Unsqueeze"(%7719, %7724) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7726 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7727 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7728 = torch.operator "onnx.Concat"(%7721, %7723, %7725, %7726, %7727) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %7729 = torch.operator "onnx.Reshape"(%7664, %7728) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %7730 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_34_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %7731:2 = torch.operator "onnx.Split"(%7729, %7730) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %7732 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7733 = torch.operator "onnx.Squeeze"(%7731#0, %7732) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %7734 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7735 = torch.operator "onnx.Squeeze"(%7731#1, %7734) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %7736 = torch.operator "onnx.Neg"(%7735) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %7737 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7738 = torch.operator "onnx.Unsqueeze"(%7736, %7737) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %7739 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7740 = torch.operator "onnx.Unsqueeze"(%7733, %7739) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %7741 = torch.operator "onnx.Concat"(%7738, %7740) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %7742 = torch.operator "onnx.Shape"(%7741) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %7743 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_39_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7744 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_40_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7745 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7746 = torch.operator "onnx.Slice"(%7742, %7744, %7745, %7743) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %7747 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_42_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7748 = torch.operator "onnx.Concat"(%7746, %7747) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %7749 = torch.operator "onnx.Reshape"(%7741, %7748) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %7750 = torch.operator "onnx.Cast"(%7664) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %7751 = torch.operator "onnx.Mul"(%7750, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %7752 = torch.operator "onnx.Cast"(%7749) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %7753 = torch.operator "onnx.Mul"(%7752, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %7754 = torch.operator "onnx.Add"(%7751, %7753) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %7755 = torch.operator "onnx.Cast"(%7754) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %7756 = torch.operator "onnx.Shape"(%7710) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %7757 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_43_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7758 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_44_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7759 = torch.operator "onnx.Slice"(%7756, %7757, %7758) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7760 = torch.operator "onnx.Cast"(%7759) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %7761 = torch.operator "onnx.Sqrt"(%7760) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %7762 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_45_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %7763 = torch.operator "onnx.Cast"(%7761) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %7764 = torch.operator "onnx.Div"(%7762, %7763) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %7765 = torch.operator "onnx.Cast"(%7764) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %7766 = torch.operator "onnx.Transpose"(%7755) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %7767 = torch.operator "onnx.Sqrt"(%7765) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %7768 = torch.operator "onnx.Mul"(%7710, %7767) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %7769 = torch.operator "onnx.Sqrt"(%7765) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %7770 = torch.operator "onnx.Mul"(%7766, %7769) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %7771 = torch.operator "onnx.MatMul"(%7768, %7770) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %7772 = torch.operator "onnx.Softmax"(%7771) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %7773 = torch.operator "onnx.MatMul"(%7772, %7665) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %7774 = torch.operator "onnx.Transpose"(%7773) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %7775 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_46_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %7776 = torch.operator "onnx.Mul"(%7550, %7775) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %7777 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12319_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7778 = torch.operator "onnx.Unsqueeze"(%7537, %7777) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7779 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_47_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7780 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12322_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7781 = torch.operator "onnx.Unsqueeze"(%7776, %7780) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7782 = torch.operator "onnx.Concat"(%7778, %7779, %7781) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %7783 = torch.operator "onnx.Reshape"(%7774, %7782) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %7784 = torch.operator "onnx.Cast"(%7783) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %7785 = torch.operator "onnx.Shape"(%7534) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %7786 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_48_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %7787 = torch.operator "onnx.Gather"(%7785, %7786) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %7788 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_49_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7789 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_50_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7790 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_51_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7791 = torch.operator "onnx.Unsqueeze"(%7787, %7790) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7792 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_52_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7793 = torch.operator "onnx.Slice"(%7784, %7789, %7791, %7788, %7792) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %7794 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_53_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7795 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_54_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7796 = torch.operator "onnx.Unsqueeze"(%7787, %7795) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7797 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_55_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7798 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_attn_Constant_56_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7799 = torch.operator "onnx.Slice"(%7784, %7796, %7797, %7794, %7798) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %7800 = torch.operator "onnx.MatMul"(%7799, %927) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %7801 = torch.operator "onnx.Add"(%326, %7800) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %7802 = torch.operator "onnx.MatMul"(%7793, %928) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %7803 = torch.operator "onnx.Add"(%327, %7802) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %7804 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7805 = torch.operator "onnx.Unsqueeze"(%7476, %7804) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %7806 = torch.operator "onnx.Mul"(%7805, %7801) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %7807 = torch.operator "onnx.Add"(%7422, %7806) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %_2Ftransformer_blocks.152Fnorm22FConstant_attr__value = util.global.load @"/transformer_blocks.15/norm2/Constant_attr__value" : tensor<3072xbf16> %7808 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.152Fnorm22FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.152Fnorm22FConstant_1_attr__value = util.global.load @"/transformer_blocks.15/norm2/Constant_1_attr__value" : tensor<3072xbf16> %7809 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.152Fnorm22FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %7810 = torch.operator "onnx.LayerNormalization"(%7807, %7808, %7809) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7811 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7812 = torch.operator "onnx.Unsqueeze"(%7482, %7811) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %7813 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %7814 = torch.operator "onnx.Add"(%7812, %7813) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %7815 = torch.operator "onnx.Mul"(%7810, %7814) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7816 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7817 = torch.operator "onnx.Unsqueeze"(%7479, %7816) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %7818 = torch.operator "onnx.Add"(%7815, %7817) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7819 = torch.operator "onnx.MatMul"(%7818, %929) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %7820 = torch.operator "onnx.Add"(%330, %7819) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %7821 = torch.operator "onnx.Mul"(%7820, %7820) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %7822 = torch.operator "onnx.Mul"(%7820, %7821) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %7823 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_ff_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %7824 = torch.operator "onnx.Mul"(%7823, %7822) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %7825 = torch.operator "onnx.Add"(%7820, %7824) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %7826 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_ff_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %7827 = torch.operator "onnx.Mul"(%7826, %7825) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %7828 = torch.operator "onnx.Tanh"(%7827) : (!torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %7829 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_ff_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %7830 = torch.operator "onnx.Add"(%7829, %7828) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %7831 = torch.operator "onnx.Mul"(%7820, %7830) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %7832 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_ff_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %7833 = torch.operator "onnx.Mul"(%7832, %7831) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %7834 = torch.operator "onnx.MatMul"(%7833, %930) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7835 = torch.operator "onnx.Add"(%331, %7834) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7836 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7837 = torch.operator "onnx.Unsqueeze"(%7485, %7836) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %7838 = torch.operator "onnx.Mul"(%7837, %7835) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7839 = torch.operator "onnx.Add"(%7807, %7838) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7840 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7841 = torch.operator "onnx.Unsqueeze"(%7514, %7840) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %7842 = torch.operator "onnx.Mul"(%7841, %7803) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %7843 = torch.operator "onnx.Add"(%7458, %7842) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %_2Ftransformer_blocks.152Fnorm2_context2FConstant_attr__value = util.global.load @"/transformer_blocks.15/norm2_context/Constant_attr__value" : tensor<3072xbf16> %7844 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.152Fnorm2_context2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.152Fnorm2_context2FConstant_1_attr__value = util.global.load @"/transformer_blocks.15/norm2_context/Constant_1_attr__value" : tensor<3072xbf16> %7845 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.152Fnorm2_context2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %7846 = torch.operator "onnx.LayerNormalization"(%7843, %7844, %7845) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7847 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7848 = torch.operator "onnx.Unsqueeze"(%7520, %7847) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %7849 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_Constant_7_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %7850 = torch.operator "onnx.Add"(%7848, %7849) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %7851 = torch.operator "onnx.Mul"(%7846, %7850) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7852 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7853 = torch.operator "onnx.Unsqueeze"(%7517, %7852) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %7854 = torch.operator "onnx.Add"(%7851, %7853) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7855 = torch.operator "onnx.MatMul"(%7854, %931) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %7856 = torch.operator "onnx.Add"(%332, %7855) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %7857 = torch.operator "onnx.Mul"(%7856, %7856) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %7858 = torch.operator "onnx.Mul"(%7856, %7857) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %7859 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_ff_context_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %7860 = torch.operator "onnx.Mul"(%7859, %7858) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %7861 = torch.operator "onnx.Add"(%7856, %7860) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %7862 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_ff_context_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %7863 = torch.operator "onnx.Mul"(%7862, %7861) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %7864 = torch.operator "onnx.Tanh"(%7863) : (!torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %7865 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_ff_context_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %7866 = torch.operator "onnx.Add"(%7865, %7864) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %7867 = torch.operator "onnx.Mul"(%7856, %7866) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %7868 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_ff_context_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %7869 = torch.operator "onnx.Mul"(%7868, %7867) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %7870 = torch.operator "onnx.MatMul"(%7869, %932) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7871 = torch.operator "onnx.Add"(%333, %7870) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7872 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.15_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7873 = torch.operator "onnx.Unsqueeze"(%7523, %7872) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %7874 = torch.operator "onnx.Mul"(%7873, %7871) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7875 = torch.operator "onnx.Add"(%7843, %7874) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7876 = torch.operator "onnx.Gemm"(%1285, %334, %335) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %7877 = torch.operator "onnx.Shape"(%7876) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %7878 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_norm1_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7879 = torch.operator "onnx.Gather"(%7877, %7878) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7880 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_norm1_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7881 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_norm1_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7882 = torch.operator "onnx.Add"(%7879, %7881) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7883 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_norm1_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7884 = torch.operator "onnx.Div"(%7882, %7883) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7885 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_norm1_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7886 = torch.operator "onnx.Mul"(%7884, %7885) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7887 = torch.operator "onnx.Slice"(%7876, %7880, %7886, %7878) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %7888 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_norm1_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7889 = torch.operator "onnx.Mul"(%7884, %7888) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7890 = torch.operator "onnx.Slice"(%7876, %7886, %7889, %7878) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %7891 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_norm1_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7892 = torch.operator "onnx.Mul"(%7884, %7891) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7893 = torch.operator "onnx.Slice"(%7876, %7889, %7892, %7878) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %7894 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_norm1_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7895 = torch.operator "onnx.Mul"(%7884, %7894) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7896 = torch.operator "onnx.Slice"(%7876, %7892, %7895, %7878) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %7897 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_norm1_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7898 = torch.operator "onnx.Mul"(%7884, %7897) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7899 = torch.operator "onnx.Slice"(%7876, %7895, %7898, %7878) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %7900 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_norm1_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7901 = torch.operator "onnx.Mul"(%7884, %7900) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7902 = torch.operator "onnx.Slice"(%7876, %7898, %7901, %7878) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.162Fnorm12Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.16/norm1/norm/Constant_attr__value" : tensor<3072xbf16> %7903 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.162Fnorm12Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.162Fnorm12Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.16/norm1/norm/Constant_1_attr__value" : tensor<3072xbf16> %7904 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.162Fnorm12Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %7905 = torch.operator "onnx.LayerNormalization"(%7839, %7903, %7904) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7906 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_norm1_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7907 = torch.operator "onnx.Unsqueeze"(%7890, %7906) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %7908 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_norm1_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %7909 = torch.operator "onnx.Add"(%7907, %7908) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %7910 = torch.operator "onnx.Mul"(%7905, %7909) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7911 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_norm1_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7912 = torch.operator "onnx.Unsqueeze"(%7887, %7911) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %7913 = torch.operator "onnx.Add"(%7910, %7912) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7914 = torch.operator "onnx.Gemm"(%1285, %336, %337) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %7915 = torch.operator "onnx.Shape"(%7914) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %7916 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_norm1_context_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7917 = torch.operator "onnx.Gather"(%7915, %7916) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7918 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_norm1_context_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7919 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_norm1_context_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7920 = torch.operator "onnx.Add"(%7917, %7919) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7921 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_norm1_context_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7922 = torch.operator "onnx.Div"(%7920, %7921) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7923 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_norm1_context_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7924 = torch.operator "onnx.Mul"(%7922, %7923) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7925 = torch.operator "onnx.Slice"(%7914, %7918, %7924, %7916) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %7926 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_norm1_context_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7927 = torch.operator "onnx.Mul"(%7922, %7926) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7928 = torch.operator "onnx.Slice"(%7914, %7924, %7927, %7916) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %7929 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_norm1_context_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7930 = torch.operator "onnx.Mul"(%7922, %7929) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7931 = torch.operator "onnx.Slice"(%7914, %7927, %7930, %7916) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %7932 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_norm1_context_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7933 = torch.operator "onnx.Mul"(%7922, %7932) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7934 = torch.operator "onnx.Slice"(%7914, %7930, %7933, %7916) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %7935 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_norm1_context_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7936 = torch.operator "onnx.Mul"(%7922, %7935) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7937 = torch.operator "onnx.Slice"(%7914, %7933, %7936, %7916) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %7938 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_norm1_context_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7939 = torch.operator "onnx.Mul"(%7922, %7938) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7940 = torch.operator "onnx.Slice"(%7914, %7936, %7939, %7916) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.162Fnorm1_context2Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.16/norm1_context/norm/Constant_attr__value" : tensor<3072xbf16> %7941 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.162Fnorm1_context2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.162Fnorm1_context2Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.16/norm1_context/norm/Constant_1_attr__value" : tensor<3072xbf16> %7942 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.162Fnorm1_context2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %7943 = torch.operator "onnx.LayerNormalization"(%7875, %7941, %7942) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7944 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_norm1_context_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7945 = torch.operator "onnx.Unsqueeze"(%7928, %7944) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %7946 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_norm1_context_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %7947 = torch.operator "onnx.Add"(%7945, %7946) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %7948 = torch.operator "onnx.Mul"(%7943, %7947) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7949 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_norm1_context_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7950 = torch.operator "onnx.Unsqueeze"(%7925, %7949) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %7951 = torch.operator "onnx.Add"(%7948, %7950) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %7952 = torch.operator "onnx.Shape"(%7951) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %7953 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %7954 = torch.operator "onnx.Gather"(%7952, %7953) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %7955 = torch.operator "onnx.MatMul"(%7913, %933) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7956 = torch.operator "onnx.Add"(%340, %7955) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7957 = torch.operator "onnx.MatMul"(%7913, %934) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7958 = torch.operator "onnx.Add"(%341, %7957) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7959 = torch.operator "onnx.MatMul"(%7913, %935) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7960 = torch.operator "onnx.Add"(%342, %7959) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %7961 = torch.operator "onnx.Shape"(%7958) : (!torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[3],si64> %7962 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %7963 = torch.operator "onnx.Gather"(%7961, %7962) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %7964 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %7965 = torch.operator "onnx.Div"(%7963, %7964) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %7966 = torch.operator "onnx.Cast"(%7965) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %7967 = torch.operator "onnx.Cast"(%7966) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %7968 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12510_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7969 = torch.operator "onnx.Unsqueeze"(%7954, %7968) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7970 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7971 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7972 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12514_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7973 = torch.operator "onnx.Unsqueeze"(%7967, %7972) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7974 = torch.operator "onnx.Concat"(%7969, %7970, %7971, %7973) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %7975 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12517_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7976 = torch.operator "onnx.Unsqueeze"(%7954, %7975) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7977 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7978 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7979 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12521_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7980 = torch.operator "onnx.Unsqueeze"(%7967, %7979) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7981 = torch.operator "onnx.Concat"(%7976, %7977, %7978, %7980) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %7982 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12524_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7983 = torch.operator "onnx.Unsqueeze"(%7954, %7982) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7984 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7985 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7986 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12528_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7987 = torch.operator "onnx.Unsqueeze"(%7967, %7986) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7988 = torch.operator "onnx.Concat"(%7983, %7984, %7985, %7987) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %7989 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12531_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7990 = torch.operator "onnx.Unsqueeze"(%7954, %7989) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7991 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7992 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7993 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12535_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7994 = torch.operator "onnx.Unsqueeze"(%7967, %7993) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7995 = torch.operator "onnx.Concat"(%7990, %7991, %7992, %7994) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %7996 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12538_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7997 = torch.operator "onnx.Unsqueeze"(%7954, %7996) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %7998 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_11_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %7999 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8000 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12542_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8001 = torch.operator "onnx.Unsqueeze"(%7967, %8000) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8002 = torch.operator "onnx.Concat"(%7997, %7998, %7999, %8001) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %8003 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12545_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8004 = torch.operator "onnx.Unsqueeze"(%7954, %8003) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8005 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8006 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_14_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8007 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12549_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8008 = torch.operator "onnx.Unsqueeze"(%7967, %8007) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8009 = torch.operator "onnx.Concat"(%8004, %8005, %8006, %8008) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %8010 = torch.operator "onnx.Reshape"(%7956, %7974) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %8011 = torch.operator "onnx.Transpose"(%8010) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %8012 = torch.operator "onnx.Reshape"(%7958, %7981) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %8013 = torch.operator "onnx.Transpose"(%8012) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %8014 = torch.operator "onnx.Reshape"(%7960, %7988) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %8015 = torch.operator "onnx.Transpose"(%8014) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %8016 = torch.operator "onnx.Cast"(%8011) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %8017 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %8018 = torch.operator "onnx.Pow"(%8016, %8017) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %8019 = torch.operator "onnx.ReduceMean"(%8018) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8020 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %8021 = torch.operator "onnx.Add"(%8019, %8020) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8022 = torch.operator "onnx.Sqrt"(%8021) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8023 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %8024 = torch.operator "onnx.Div"(%8023, %8022) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8025 = torch.operator "onnx.Cast"(%8011) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %8026 = torch.operator "onnx.Mul"(%8025, %8024) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %8027 = torch.operator "onnx.Cast"(%8026) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %8028 = torch.operator "onnx.Mul"(%8027, %338) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %8029 = torch.operator "onnx.Cast"(%8013) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %8030 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %8031 = torch.operator "onnx.Pow"(%8029, %8030) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %8032 = torch.operator "onnx.ReduceMean"(%8031) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8033 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %8034 = torch.operator "onnx.Add"(%8032, %8033) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8035 = torch.operator "onnx.Sqrt"(%8034) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8036 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %8037 = torch.operator "onnx.Div"(%8036, %8035) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8038 = torch.operator "onnx.Cast"(%8013) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %8039 = torch.operator "onnx.Mul"(%8038, %8037) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %8040 = torch.operator "onnx.Cast"(%8039) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %8041 = torch.operator "onnx.Mul"(%8040, %339) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %8042 = torch.operator "onnx.MatMul"(%7951, %936) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8043 = torch.operator "onnx.Add"(%345, %8042) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8044 = torch.operator "onnx.MatMul"(%7951, %937) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8045 = torch.operator "onnx.Add"(%343, %8044) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8046 = torch.operator "onnx.MatMul"(%7951, %938) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8047 = torch.operator "onnx.Add"(%344, %8046) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8048 = torch.operator "onnx.Reshape"(%8043, %7995) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %8049 = torch.operator "onnx.Transpose"(%8048) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %8050 = torch.operator "onnx.Reshape"(%8045, %8002) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %8051 = torch.operator "onnx.Transpose"(%8050) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %8052 = torch.operator "onnx.Reshape"(%8047, %8009) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %8053 = torch.operator "onnx.Transpose"(%8052) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %8054 = torch.operator "onnx.Cast"(%8049) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %8055 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_norm_added_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %8056 = torch.operator "onnx.Pow"(%8054, %8055) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %8057 = torch.operator "onnx.ReduceMean"(%8056) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8058 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_norm_added_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %8059 = torch.operator "onnx.Add"(%8057, %8058) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8060 = torch.operator "onnx.Sqrt"(%8059) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8061 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_norm_added_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %8062 = torch.operator "onnx.Div"(%8061, %8060) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8063 = torch.operator "onnx.Cast"(%8049) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %8064 = torch.operator "onnx.Mul"(%8063, %8062) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %8065 = torch.operator "onnx.Cast"(%8064) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %8066 = torch.operator "onnx.Mul"(%8065, %348) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %8067 = torch.operator "onnx.Cast"(%8051) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %8068 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_norm_added_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %8069 = torch.operator "onnx.Pow"(%8067, %8068) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %8070 = torch.operator "onnx.ReduceMean"(%8069) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8071 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_norm_added_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %8072 = torch.operator "onnx.Add"(%8070, %8071) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8073 = torch.operator "onnx.Sqrt"(%8072) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8074 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_norm_added_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %8075 = torch.operator "onnx.Div"(%8074, %8073) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8076 = torch.operator "onnx.Cast"(%8051) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %8077 = torch.operator "onnx.Mul"(%8076, %8075) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %8078 = torch.operator "onnx.Cast"(%8077) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %8079 = torch.operator "onnx.Mul"(%8078, %349) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %8080 = torch.operator "onnx.Concat"(%8066, %8028) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %8081 = torch.operator "onnx.Concat"(%8079, %8041) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %8082 = torch.operator "onnx.Concat"(%8053, %8015) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %8083 = torch.operator "onnx.Shape"(%8080) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %8084 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_15_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %8085 = torch.operator "onnx.Gather"(%8083, %8084) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %8086 = torch.operator "onnx.Shape"(%8080) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %8087 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_16_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %8088 = torch.operator "onnx.Gather"(%8086, %8087) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %8089 = torch.operator "onnx.Shape"(%8080) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %8090 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_17_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %8091 = torch.operator "onnx.Gather"(%8089, %8090) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %8092 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12634_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8093 = torch.operator "onnx.Unsqueeze"(%8085, %8092) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8094 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12636_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8095 = torch.operator "onnx.Unsqueeze"(%8088, %8094) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8096 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12638_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8097 = torch.operator "onnx.Unsqueeze"(%8091, %8096) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8098 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8099 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8100 = torch.operator "onnx.Concat"(%8093, %8095, %8097, %8098, %8099) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %8101 = torch.operator "onnx.Reshape"(%8080, %8100) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %8102 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_20_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %8103:2 = torch.operator "onnx.Split"(%8101, %8102) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %8104 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8105 = torch.operator "onnx.Squeeze"(%8103#0, %8104) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %8106 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8107 = torch.operator "onnx.Squeeze"(%8103#1, %8106) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %8108 = torch.operator "onnx.Neg"(%8107) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %8109 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_23_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8110 = torch.operator "onnx.Unsqueeze"(%8108, %8109) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %8111 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_24_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8112 = torch.operator "onnx.Unsqueeze"(%8105, %8111) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %8113 = torch.operator "onnx.Concat"(%8110, %8112) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %8114 = torch.operator "onnx.Shape"(%8113) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %8115 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_25_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8116 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8117 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8118 = torch.operator "onnx.Slice"(%8114, %8116, %8117, %8115) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %8119 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_28_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8120 = torch.operator "onnx.Concat"(%8118, %8119) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %8121 = torch.operator "onnx.Reshape"(%8113, %8120) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %8122 = torch.operator "onnx.Cast"(%8080) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %8123 = torch.operator "onnx.Mul"(%8122, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %8124 = torch.operator "onnx.Cast"(%8121) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %8125 = torch.operator "onnx.Mul"(%8124, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %8126 = torch.operator "onnx.Add"(%8123, %8125) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %8127 = torch.operator "onnx.Cast"(%8126) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %8128 = torch.operator "onnx.Shape"(%8081) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %8129 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_29_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %8130 = torch.operator "onnx.Gather"(%8128, %8129) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %8131 = torch.operator "onnx.Shape"(%8081) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %8132 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_30_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %8133 = torch.operator "onnx.Gather"(%8131, %8132) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %8134 = torch.operator "onnx.Shape"(%8081) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %8135 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_31_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %8136 = torch.operator "onnx.Gather"(%8134, %8135) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %8137 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12679_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8138 = torch.operator "onnx.Unsqueeze"(%8130, %8137) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8139 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12681_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8140 = torch.operator "onnx.Unsqueeze"(%8133, %8139) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8141 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12683_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8142 = torch.operator "onnx.Unsqueeze"(%8136, %8141) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8143 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8144 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8145 = torch.operator "onnx.Concat"(%8138, %8140, %8142, %8143, %8144) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %8146 = torch.operator "onnx.Reshape"(%8081, %8145) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %8147 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_34_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %8148:2 = torch.operator "onnx.Split"(%8146, %8147) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %8149 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8150 = torch.operator "onnx.Squeeze"(%8148#0, %8149) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %8151 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8152 = torch.operator "onnx.Squeeze"(%8148#1, %8151) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %8153 = torch.operator "onnx.Neg"(%8152) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %8154 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8155 = torch.operator "onnx.Unsqueeze"(%8153, %8154) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %8156 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8157 = torch.operator "onnx.Unsqueeze"(%8150, %8156) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %8158 = torch.operator "onnx.Concat"(%8155, %8157) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %8159 = torch.operator "onnx.Shape"(%8158) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %8160 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_39_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8161 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_40_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8162 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8163 = torch.operator "onnx.Slice"(%8159, %8161, %8162, %8160) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %8164 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_42_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8165 = torch.operator "onnx.Concat"(%8163, %8164) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %8166 = torch.operator "onnx.Reshape"(%8158, %8165) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %8167 = torch.operator "onnx.Cast"(%8081) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %8168 = torch.operator "onnx.Mul"(%8167, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %8169 = torch.operator "onnx.Cast"(%8166) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %8170 = torch.operator "onnx.Mul"(%8169, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %8171 = torch.operator "onnx.Add"(%8168, %8170) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %8172 = torch.operator "onnx.Cast"(%8171) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %8173 = torch.operator "onnx.Shape"(%8127) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %8174 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_43_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8175 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_44_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8176 = torch.operator "onnx.Slice"(%8173, %8174, %8175) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8177 = torch.operator "onnx.Cast"(%8176) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %8178 = torch.operator "onnx.Sqrt"(%8177) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %8179 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_45_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %8180 = torch.operator "onnx.Cast"(%8178) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %8181 = torch.operator "onnx.Div"(%8179, %8180) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %8182 = torch.operator "onnx.Cast"(%8181) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %8183 = torch.operator "onnx.Transpose"(%8172) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %8184 = torch.operator "onnx.Sqrt"(%8182) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %8185 = torch.operator "onnx.Mul"(%8127, %8184) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %8186 = torch.operator "onnx.Sqrt"(%8182) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %8187 = torch.operator "onnx.Mul"(%8183, %8186) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %8188 = torch.operator "onnx.MatMul"(%8185, %8187) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %8189 = torch.operator "onnx.Softmax"(%8188) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %8190 = torch.operator "onnx.MatMul"(%8189, %8082) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %8191 = torch.operator "onnx.Transpose"(%8190) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %8192 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_46_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %8193 = torch.operator "onnx.Mul"(%7967, %8192) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %8194 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12736_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8195 = torch.operator "onnx.Unsqueeze"(%7954, %8194) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8196 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_47_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8197 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12739_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8198 = torch.operator "onnx.Unsqueeze"(%8193, %8197) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8199 = torch.operator "onnx.Concat"(%8195, %8196, %8198) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %8200 = torch.operator "onnx.Reshape"(%8191, %8199) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %8201 = torch.operator "onnx.Cast"(%8200) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %8202 = torch.operator "onnx.Shape"(%7951) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %8203 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_48_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %8204 = torch.operator "onnx.Gather"(%8202, %8203) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %8205 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_49_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8206 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_50_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8207 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_51_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8208 = torch.operator "onnx.Unsqueeze"(%8204, %8207) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8209 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_52_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8210 = torch.operator "onnx.Slice"(%8201, %8206, %8208, %8205, %8209) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %8211 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_53_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8212 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_54_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8213 = torch.operator "onnx.Unsqueeze"(%8204, %8212) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8214 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_55_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8215 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_attn_Constant_56_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8216 = torch.operator "onnx.Slice"(%8201, %8213, %8214, %8211, %8215) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %8217 = torch.operator "onnx.MatMul"(%8216, %939) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %8218 = torch.operator "onnx.Add"(%346, %8217) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %8219 = torch.operator "onnx.MatMul"(%8210, %940) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %8220 = torch.operator "onnx.Add"(%347, %8219) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %8221 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8222 = torch.operator "onnx.Unsqueeze"(%7893, %8221) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %8223 = torch.operator "onnx.Mul"(%8222, %8218) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %8224 = torch.operator "onnx.Add"(%7839, %8223) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %_2Ftransformer_blocks.162Fnorm22FConstant_attr__value = util.global.load @"/transformer_blocks.16/norm2/Constant_attr__value" : tensor<3072xbf16> %8225 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.162Fnorm22FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.162Fnorm22FConstant_1_attr__value = util.global.load @"/transformer_blocks.16/norm2/Constant_1_attr__value" : tensor<3072xbf16> %8226 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.162Fnorm22FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %8227 = torch.operator "onnx.LayerNormalization"(%8224, %8225, %8226) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %8228 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8229 = torch.operator "onnx.Unsqueeze"(%7899, %8228) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %8230 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %8231 = torch.operator "onnx.Add"(%8229, %8230) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %8232 = torch.operator "onnx.Mul"(%8227, %8231) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %8233 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8234 = torch.operator "onnx.Unsqueeze"(%7896, %8233) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %8235 = torch.operator "onnx.Add"(%8232, %8234) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %8236 = torch.operator "onnx.MatMul"(%8235, %941) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %8237 = torch.operator "onnx.Add"(%350, %8236) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %8238 = torch.operator "onnx.Mul"(%8237, %8237) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %8239 = torch.operator "onnx.Mul"(%8237, %8238) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %8240 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_ff_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %8241 = torch.operator "onnx.Mul"(%8240, %8239) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %8242 = torch.operator "onnx.Add"(%8237, %8241) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %8243 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_ff_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %8244 = torch.operator "onnx.Mul"(%8243, %8242) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %8245 = torch.operator "onnx.Tanh"(%8244) : (!torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %8246 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_ff_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %8247 = torch.operator "onnx.Add"(%8246, %8245) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %8248 = torch.operator "onnx.Mul"(%8237, %8247) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %8249 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_ff_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %8250 = torch.operator "onnx.Mul"(%8249, %8248) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %8251 = torch.operator "onnx.MatMul"(%8250, %942) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %8252 = torch.operator "onnx.Add"(%351, %8251) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %8253 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8254 = torch.operator "onnx.Unsqueeze"(%7902, %8253) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %8255 = torch.operator "onnx.Mul"(%8254, %8252) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %8256 = torch.operator "onnx.Add"(%8224, %8255) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %8257 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8258 = torch.operator "onnx.Unsqueeze"(%7931, %8257) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %8259 = torch.operator "onnx.Mul"(%8258, %8220) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %8260 = torch.operator "onnx.Add"(%7875, %8259) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %_2Ftransformer_blocks.162Fnorm2_context2FConstant_attr__value = util.global.load @"/transformer_blocks.16/norm2_context/Constant_attr__value" : tensor<3072xbf16> %8261 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.162Fnorm2_context2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.162Fnorm2_context2FConstant_1_attr__value = util.global.load @"/transformer_blocks.16/norm2_context/Constant_1_attr__value" : tensor<3072xbf16> %8262 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.162Fnorm2_context2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %8263 = torch.operator "onnx.LayerNormalization"(%8260, %8261, %8262) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8264 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8265 = torch.operator "onnx.Unsqueeze"(%7937, %8264) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %8266 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_Constant_7_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %8267 = torch.operator "onnx.Add"(%8265, %8266) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %8268 = torch.operator "onnx.Mul"(%8263, %8267) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8269 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8270 = torch.operator "onnx.Unsqueeze"(%7934, %8269) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %8271 = torch.operator "onnx.Add"(%8268, %8270) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8272 = torch.operator "onnx.MatMul"(%8271, %943) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %8273 = torch.operator "onnx.Add"(%352, %8272) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %8274 = torch.operator "onnx.Mul"(%8273, %8273) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %8275 = torch.operator "onnx.Mul"(%8273, %8274) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %8276 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_ff_context_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %8277 = torch.operator "onnx.Mul"(%8276, %8275) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %8278 = torch.operator "onnx.Add"(%8273, %8277) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %8279 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_ff_context_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %8280 = torch.operator "onnx.Mul"(%8279, %8278) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %8281 = torch.operator "onnx.Tanh"(%8280) : (!torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %8282 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_ff_context_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %8283 = torch.operator "onnx.Add"(%8282, %8281) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %8284 = torch.operator "onnx.Mul"(%8273, %8283) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %8285 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_ff_context_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %8286 = torch.operator "onnx.Mul"(%8285, %8284) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %8287 = torch.operator "onnx.MatMul"(%8286, %944) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8288 = torch.operator "onnx.Add"(%353, %8287) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8289 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.16_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8290 = torch.operator "onnx.Unsqueeze"(%7940, %8289) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %8291 = torch.operator "onnx.Mul"(%8290, %8288) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8292 = torch.operator "onnx.Add"(%8260, %8291) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8293 = torch.operator "onnx.Gemm"(%1285, %354, %355) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %8294 = torch.operator "onnx.Shape"(%8293) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %8295 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_norm1_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8296 = torch.operator "onnx.Gather"(%8294, %8295) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8297 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_norm1_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8298 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_norm1_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8299 = torch.operator "onnx.Add"(%8296, %8298) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8300 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_norm1_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8301 = torch.operator "onnx.Div"(%8299, %8300) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8302 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_norm1_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8303 = torch.operator "onnx.Mul"(%8301, %8302) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8304 = torch.operator "onnx.Slice"(%8293, %8297, %8303, %8295) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %8305 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_norm1_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8306 = torch.operator "onnx.Mul"(%8301, %8305) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8307 = torch.operator "onnx.Slice"(%8293, %8303, %8306, %8295) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %8308 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_norm1_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8309 = torch.operator "onnx.Mul"(%8301, %8308) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8310 = torch.operator "onnx.Slice"(%8293, %8306, %8309, %8295) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %8311 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_norm1_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8312 = torch.operator "onnx.Mul"(%8301, %8311) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8313 = torch.operator "onnx.Slice"(%8293, %8309, %8312, %8295) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %8314 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_norm1_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8315 = torch.operator "onnx.Mul"(%8301, %8314) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8316 = torch.operator "onnx.Slice"(%8293, %8312, %8315, %8295) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %8317 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_norm1_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8318 = torch.operator "onnx.Mul"(%8301, %8317) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8319 = torch.operator "onnx.Slice"(%8293, %8315, %8318, %8295) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.172Fnorm12Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.17/norm1/norm/Constant_attr__value" : tensor<3072xbf16> %8320 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.172Fnorm12Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.172Fnorm12Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.17/norm1/norm/Constant_1_attr__value" : tensor<3072xbf16> %8321 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.172Fnorm12Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %8322 = torch.operator "onnx.LayerNormalization"(%8256, %8320, %8321) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %8323 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_norm1_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8324 = torch.operator "onnx.Unsqueeze"(%8307, %8323) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %8325 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_norm1_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %8326 = torch.operator "onnx.Add"(%8324, %8325) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %8327 = torch.operator "onnx.Mul"(%8322, %8326) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %8328 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_norm1_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8329 = torch.operator "onnx.Unsqueeze"(%8304, %8328) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %8330 = torch.operator "onnx.Add"(%8327, %8329) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %8331 = torch.operator "onnx.Gemm"(%1285, %356, %357) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %8332 = torch.operator "onnx.Shape"(%8331) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %8333 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_norm1_context_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8334 = torch.operator "onnx.Gather"(%8332, %8333) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8335 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_norm1_context_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8336 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_norm1_context_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8337 = torch.operator "onnx.Add"(%8334, %8336) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8338 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_norm1_context_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8339 = torch.operator "onnx.Div"(%8337, %8338) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8340 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_norm1_context_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8341 = torch.operator "onnx.Mul"(%8339, %8340) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8342 = torch.operator "onnx.Slice"(%8331, %8335, %8341, %8333) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %8343 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_norm1_context_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8344 = torch.operator "onnx.Mul"(%8339, %8343) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8345 = torch.operator "onnx.Slice"(%8331, %8341, %8344, %8333) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %8346 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_norm1_context_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8347 = torch.operator "onnx.Mul"(%8339, %8346) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8348 = torch.operator "onnx.Slice"(%8331, %8344, %8347, %8333) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %8349 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_norm1_context_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8350 = torch.operator "onnx.Mul"(%8339, %8349) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8351 = torch.operator "onnx.Slice"(%8331, %8347, %8350, %8333) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %8352 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_norm1_context_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8353 = torch.operator "onnx.Mul"(%8339, %8352) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8354 = torch.operator "onnx.Slice"(%8331, %8350, %8353, %8333) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %8355 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_norm1_context_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8356 = torch.operator "onnx.Mul"(%8339, %8355) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8357 = torch.operator "onnx.Slice"(%8331, %8353, %8356, %8333) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.172Fnorm1_context2Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.17/norm1_context/norm/Constant_attr__value" : tensor<3072xbf16> %8358 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.172Fnorm1_context2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.172Fnorm1_context2Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.17/norm1_context/norm/Constant_1_attr__value" : tensor<3072xbf16> %8359 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.172Fnorm1_context2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %8360 = torch.operator "onnx.LayerNormalization"(%8292, %8358, %8359) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8361 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_norm1_context_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8362 = torch.operator "onnx.Unsqueeze"(%8345, %8361) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %8363 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_norm1_context_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %8364 = torch.operator "onnx.Add"(%8362, %8363) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %8365 = torch.operator "onnx.Mul"(%8360, %8364) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8366 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_norm1_context_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8367 = torch.operator "onnx.Unsqueeze"(%8342, %8366) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %8368 = torch.operator "onnx.Add"(%8365, %8367) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8369 = torch.operator "onnx.Shape"(%8368) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %8370 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %8371 = torch.operator "onnx.Gather"(%8369, %8370) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %8372 = torch.operator "onnx.MatMul"(%8330, %945) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %8373 = torch.operator "onnx.Add"(%360, %8372) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %8374 = torch.operator "onnx.MatMul"(%8330, %946) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %8375 = torch.operator "onnx.Add"(%361, %8374) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %8376 = torch.operator "onnx.MatMul"(%8330, %947) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %8377 = torch.operator "onnx.Add"(%362, %8376) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %8378 = torch.operator "onnx.Shape"(%8375) : (!torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[3],si64> %8379 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %8380 = torch.operator "onnx.Gather"(%8378, %8379) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %8381 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %8382 = torch.operator "onnx.Div"(%8380, %8381) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %8383 = torch.operator "onnx.Cast"(%8382) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %8384 = torch.operator "onnx.Cast"(%8383) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %8385 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12927_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8386 = torch.operator "onnx.Unsqueeze"(%8371, %8385) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8387 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8388 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8389 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12931_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8390 = torch.operator "onnx.Unsqueeze"(%8384, %8389) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8391 = torch.operator "onnx.Concat"(%8386, %8387, %8388, %8390) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %8392 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12934_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8393 = torch.operator "onnx.Unsqueeze"(%8371, %8392) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8394 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8395 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8396 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12938_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8397 = torch.operator "onnx.Unsqueeze"(%8384, %8396) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8398 = torch.operator "onnx.Concat"(%8393, %8394, %8395, %8397) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %8399 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12941_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8400 = torch.operator "onnx.Unsqueeze"(%8371, %8399) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8401 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8402 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8403 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12945_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8404 = torch.operator "onnx.Unsqueeze"(%8384, %8403) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8405 = torch.operator "onnx.Concat"(%8400, %8401, %8402, %8404) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %8406 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12948_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8407 = torch.operator "onnx.Unsqueeze"(%8371, %8406) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8408 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8409 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8410 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12952_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8411 = torch.operator "onnx.Unsqueeze"(%8384, %8410) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8412 = torch.operator "onnx.Concat"(%8407, %8408, %8409, %8411) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %8413 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12955_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8414 = torch.operator "onnx.Unsqueeze"(%8371, %8413) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8415 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_11_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8416 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8417 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12959_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8418 = torch.operator "onnx.Unsqueeze"(%8384, %8417) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8419 = torch.operator "onnx.Concat"(%8414, %8415, %8416, %8418) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %8420 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12962_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8421 = torch.operator "onnx.Unsqueeze"(%8371, %8420) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8422 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8423 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_14_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8424 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_12966_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8425 = torch.operator "onnx.Unsqueeze"(%8384, %8424) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8426 = torch.operator "onnx.Concat"(%8421, %8422, %8423, %8425) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %8427 = torch.operator "onnx.Reshape"(%8373, %8391) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %8428 = torch.operator "onnx.Transpose"(%8427) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %8429 = torch.operator "onnx.Reshape"(%8375, %8398) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %8430 = torch.operator "onnx.Transpose"(%8429) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %8431 = torch.operator "onnx.Reshape"(%8377, %8405) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %8432 = torch.operator "onnx.Transpose"(%8431) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %8433 = torch.operator "onnx.Cast"(%8428) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %8434 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %8435 = torch.operator "onnx.Pow"(%8433, %8434) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %8436 = torch.operator "onnx.ReduceMean"(%8435) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8437 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %8438 = torch.operator "onnx.Add"(%8436, %8437) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8439 = torch.operator "onnx.Sqrt"(%8438) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8440 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %8441 = torch.operator "onnx.Div"(%8440, %8439) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8442 = torch.operator "onnx.Cast"(%8428) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %8443 = torch.operator "onnx.Mul"(%8442, %8441) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %8444 = torch.operator "onnx.Cast"(%8443) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %8445 = torch.operator "onnx.Mul"(%8444, %358) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %8446 = torch.operator "onnx.Cast"(%8430) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %8447 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %8448 = torch.operator "onnx.Pow"(%8446, %8447) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %8449 = torch.operator "onnx.ReduceMean"(%8448) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8450 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %8451 = torch.operator "onnx.Add"(%8449, %8450) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8452 = torch.operator "onnx.Sqrt"(%8451) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8453 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %8454 = torch.operator "onnx.Div"(%8453, %8452) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8455 = torch.operator "onnx.Cast"(%8430) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %8456 = torch.operator "onnx.Mul"(%8455, %8454) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %8457 = torch.operator "onnx.Cast"(%8456) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %8458 = torch.operator "onnx.Mul"(%8457, %359) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %8459 = torch.operator "onnx.MatMul"(%8368, %948) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8460 = torch.operator "onnx.Add"(%365, %8459) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8461 = torch.operator "onnx.MatMul"(%8368, %949) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8462 = torch.operator "onnx.Add"(%363, %8461) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8463 = torch.operator "onnx.MatMul"(%8368, %950) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8464 = torch.operator "onnx.Add"(%364, %8463) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8465 = torch.operator "onnx.Reshape"(%8460, %8412) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %8466 = torch.operator "onnx.Transpose"(%8465) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %8467 = torch.operator "onnx.Reshape"(%8462, %8419) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %8468 = torch.operator "onnx.Transpose"(%8467) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %8469 = torch.operator "onnx.Reshape"(%8464, %8426) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %8470 = torch.operator "onnx.Transpose"(%8469) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %8471 = torch.operator "onnx.Cast"(%8466) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %8472 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_norm_added_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %8473 = torch.operator "onnx.Pow"(%8471, %8472) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %8474 = torch.operator "onnx.ReduceMean"(%8473) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8475 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_norm_added_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %8476 = torch.operator "onnx.Add"(%8474, %8475) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8477 = torch.operator "onnx.Sqrt"(%8476) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8478 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_norm_added_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %8479 = torch.operator "onnx.Div"(%8478, %8477) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8480 = torch.operator "onnx.Cast"(%8466) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %8481 = torch.operator "onnx.Mul"(%8480, %8479) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %8482 = torch.operator "onnx.Cast"(%8481) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %8483 = torch.operator "onnx.Mul"(%8482, %368) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %8484 = torch.operator "onnx.Cast"(%8468) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %8485 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_norm_added_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %8486 = torch.operator "onnx.Pow"(%8484, %8485) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %8487 = torch.operator "onnx.ReduceMean"(%8486) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8488 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_norm_added_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %8489 = torch.operator "onnx.Add"(%8487, %8488) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8490 = torch.operator "onnx.Sqrt"(%8489) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8491 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_norm_added_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %8492 = torch.operator "onnx.Div"(%8491, %8490) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8493 = torch.operator "onnx.Cast"(%8468) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %8494 = torch.operator "onnx.Mul"(%8493, %8492) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %8495 = torch.operator "onnx.Cast"(%8494) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %8496 = torch.operator "onnx.Mul"(%8495, %369) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %8497 = torch.operator "onnx.Concat"(%8483, %8445) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %8498 = torch.operator "onnx.Concat"(%8496, %8458) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %8499 = torch.operator "onnx.Concat"(%8470, %8432) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %8500 = torch.operator "onnx.Shape"(%8497) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %8501 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_15_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %8502 = torch.operator "onnx.Gather"(%8500, %8501) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %8503 = torch.operator "onnx.Shape"(%8497) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %8504 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_16_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %8505 = torch.operator "onnx.Gather"(%8503, %8504) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %8506 = torch.operator "onnx.Shape"(%8497) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %8507 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_17_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %8508 = torch.operator "onnx.Gather"(%8506, %8507) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %8509 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13051_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8510 = torch.operator "onnx.Unsqueeze"(%8502, %8509) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8511 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13053_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8512 = torch.operator "onnx.Unsqueeze"(%8505, %8511) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8513 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13055_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8514 = torch.operator "onnx.Unsqueeze"(%8508, %8513) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8515 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8516 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8517 = torch.operator "onnx.Concat"(%8510, %8512, %8514, %8515, %8516) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %8518 = torch.operator "onnx.Reshape"(%8497, %8517) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %8519 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_20_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %8520:2 = torch.operator "onnx.Split"(%8518, %8519) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %8521 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8522 = torch.operator "onnx.Squeeze"(%8520#0, %8521) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %8523 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8524 = torch.operator "onnx.Squeeze"(%8520#1, %8523) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %8525 = torch.operator "onnx.Neg"(%8524) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %8526 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_23_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8527 = torch.operator "onnx.Unsqueeze"(%8525, %8526) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %8528 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_24_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8529 = torch.operator "onnx.Unsqueeze"(%8522, %8528) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %8530 = torch.operator "onnx.Concat"(%8527, %8529) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %8531 = torch.operator "onnx.Shape"(%8530) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %8532 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_25_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8533 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8534 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8535 = torch.operator "onnx.Slice"(%8531, %8533, %8534, %8532) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %8536 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_28_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8537 = torch.operator "onnx.Concat"(%8535, %8536) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %8538 = torch.operator "onnx.Reshape"(%8530, %8537) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %8539 = torch.operator "onnx.Cast"(%8497) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %8540 = torch.operator "onnx.Mul"(%8539, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %8541 = torch.operator "onnx.Cast"(%8538) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %8542 = torch.operator "onnx.Mul"(%8541, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %8543 = torch.operator "onnx.Add"(%8540, %8542) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %8544 = torch.operator "onnx.Cast"(%8543) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %8545 = torch.operator "onnx.Shape"(%8498) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %8546 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_29_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %8547 = torch.operator "onnx.Gather"(%8545, %8546) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %8548 = torch.operator "onnx.Shape"(%8498) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %8549 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_30_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %8550 = torch.operator "onnx.Gather"(%8548, %8549) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %8551 = torch.operator "onnx.Shape"(%8498) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %8552 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_31_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %8553 = torch.operator "onnx.Gather"(%8551, %8552) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %8554 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13096_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8555 = torch.operator "onnx.Unsqueeze"(%8547, %8554) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8556 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13098_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8557 = torch.operator "onnx.Unsqueeze"(%8550, %8556) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8558 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13100_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8559 = torch.operator "onnx.Unsqueeze"(%8553, %8558) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8560 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8561 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8562 = torch.operator "onnx.Concat"(%8555, %8557, %8559, %8560, %8561) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %8563 = torch.operator "onnx.Reshape"(%8498, %8562) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %8564 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_34_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %8565:2 = torch.operator "onnx.Split"(%8563, %8564) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %8566 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8567 = torch.operator "onnx.Squeeze"(%8565#0, %8566) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %8568 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8569 = torch.operator "onnx.Squeeze"(%8565#1, %8568) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %8570 = torch.operator "onnx.Neg"(%8569) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %8571 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8572 = torch.operator "onnx.Unsqueeze"(%8570, %8571) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %8573 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8574 = torch.operator "onnx.Unsqueeze"(%8567, %8573) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %8575 = torch.operator "onnx.Concat"(%8572, %8574) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %8576 = torch.operator "onnx.Shape"(%8575) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %8577 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_39_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8578 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_40_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8579 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8580 = torch.operator "onnx.Slice"(%8576, %8578, %8579, %8577) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %8581 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_42_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8582 = torch.operator "onnx.Concat"(%8580, %8581) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %8583 = torch.operator "onnx.Reshape"(%8575, %8582) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %8584 = torch.operator "onnx.Cast"(%8498) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %8585 = torch.operator "onnx.Mul"(%8584, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %8586 = torch.operator "onnx.Cast"(%8583) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %8587 = torch.operator "onnx.Mul"(%8586, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %8588 = torch.operator "onnx.Add"(%8585, %8587) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %8589 = torch.operator "onnx.Cast"(%8588) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %8590 = torch.operator "onnx.Shape"(%8544) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %8591 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_43_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8592 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_44_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8593 = torch.operator "onnx.Slice"(%8590, %8591, %8592) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8594 = torch.operator "onnx.Cast"(%8593) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %8595 = torch.operator "onnx.Sqrt"(%8594) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %8596 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_45_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %8597 = torch.operator "onnx.Cast"(%8595) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %8598 = torch.operator "onnx.Div"(%8596, %8597) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %8599 = torch.operator "onnx.Cast"(%8598) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %8600 = torch.operator "onnx.Transpose"(%8589) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %8601 = torch.operator "onnx.Sqrt"(%8599) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %8602 = torch.operator "onnx.Mul"(%8544, %8601) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %8603 = torch.operator "onnx.Sqrt"(%8599) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %8604 = torch.operator "onnx.Mul"(%8600, %8603) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %8605 = torch.operator "onnx.MatMul"(%8602, %8604) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %8606 = torch.operator "onnx.Softmax"(%8605) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %8607 = torch.operator "onnx.MatMul"(%8606, %8499) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %8608 = torch.operator "onnx.Transpose"(%8607) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %8609 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_46_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %8610 = torch.operator "onnx.Mul"(%8384, %8609) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %8611 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13153_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8612 = torch.operator "onnx.Unsqueeze"(%8371, %8611) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8613 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_47_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8614 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13156_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8615 = torch.operator "onnx.Unsqueeze"(%8610, %8614) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8616 = torch.operator "onnx.Concat"(%8612, %8613, %8615) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %8617 = torch.operator "onnx.Reshape"(%8608, %8616) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %8618 = torch.operator "onnx.Cast"(%8617) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %8619 = torch.operator "onnx.Shape"(%8368) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %8620 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_48_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %8621 = torch.operator "onnx.Gather"(%8619, %8620) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %8622 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_49_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8623 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_50_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8624 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_51_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8625 = torch.operator "onnx.Unsqueeze"(%8621, %8624) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8626 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_52_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8627 = torch.operator "onnx.Slice"(%8618, %8623, %8625, %8622, %8626) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %8628 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_53_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8629 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_54_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8630 = torch.operator "onnx.Unsqueeze"(%8621, %8629) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8631 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_55_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8632 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_attn_Constant_56_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8633 = torch.operator "onnx.Slice"(%8618, %8630, %8631, %8628, %8632) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %8634 = torch.operator "onnx.MatMul"(%8633, %951) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %8635 = torch.operator "onnx.Add"(%366, %8634) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %8636 = torch.operator "onnx.MatMul"(%8627, %952) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %8637 = torch.operator "onnx.Add"(%367, %8636) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %8638 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8639 = torch.operator "onnx.Unsqueeze"(%8310, %8638) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %8640 = torch.operator "onnx.Mul"(%8639, %8635) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %8641 = torch.operator "onnx.Add"(%8256, %8640) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %_2Ftransformer_blocks.172Fnorm22FConstant_attr__value = util.global.load @"/transformer_blocks.17/norm2/Constant_attr__value" : tensor<3072xbf16> %8642 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.172Fnorm22FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.172Fnorm22FConstant_1_attr__value = util.global.load @"/transformer_blocks.17/norm2/Constant_1_attr__value" : tensor<3072xbf16> %8643 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.172Fnorm22FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %8644 = torch.operator "onnx.LayerNormalization"(%8641, %8642, %8643) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %8645 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8646 = torch.operator "onnx.Unsqueeze"(%8316, %8645) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %8647 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %8648 = torch.operator "onnx.Add"(%8646, %8647) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %8649 = torch.operator "onnx.Mul"(%8644, %8648) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %8650 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8651 = torch.operator "onnx.Unsqueeze"(%8313, %8650) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %8652 = torch.operator "onnx.Add"(%8649, %8651) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %8653 = torch.operator "onnx.MatMul"(%8652, %953) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %8654 = torch.operator "onnx.Add"(%370, %8653) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %8655 = torch.operator "onnx.Mul"(%8654, %8654) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %8656 = torch.operator "onnx.Mul"(%8654, %8655) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %8657 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_ff_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %8658 = torch.operator "onnx.Mul"(%8657, %8656) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %8659 = torch.operator "onnx.Add"(%8654, %8658) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %8660 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_ff_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %8661 = torch.operator "onnx.Mul"(%8660, %8659) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %8662 = torch.operator "onnx.Tanh"(%8661) : (!torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %8663 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_ff_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %8664 = torch.operator "onnx.Add"(%8663, %8662) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %8665 = torch.operator "onnx.Mul"(%8654, %8664) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %8666 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_ff_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %8667 = torch.operator "onnx.Mul"(%8666, %8665) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %8668 = torch.operator "onnx.MatMul"(%8667, %954) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %8669 = torch.operator "onnx.Add"(%371, %8668) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %8670 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8671 = torch.operator "onnx.Unsqueeze"(%8319, %8670) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %8672 = torch.operator "onnx.Mul"(%8671, %8669) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %8673 = torch.operator "onnx.Add"(%8641, %8672) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %8674 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8675 = torch.operator "onnx.Unsqueeze"(%8348, %8674) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %8676 = torch.operator "onnx.Mul"(%8675, %8637) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %8677 = torch.operator "onnx.Add"(%8292, %8676) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %_2Ftransformer_blocks.172Fnorm2_context2FConstant_attr__value = util.global.load @"/transformer_blocks.17/norm2_context/Constant_attr__value" : tensor<3072xbf16> %8678 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.172Fnorm2_context2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.172Fnorm2_context2FConstant_1_attr__value = util.global.load @"/transformer_blocks.17/norm2_context/Constant_1_attr__value" : tensor<3072xbf16> %8679 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.172Fnorm2_context2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %8680 = torch.operator "onnx.LayerNormalization"(%8677, %8678, %8679) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8681 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8682 = torch.operator "onnx.Unsqueeze"(%8354, %8681) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %8683 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_Constant_7_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %8684 = torch.operator "onnx.Add"(%8682, %8683) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %8685 = torch.operator "onnx.Mul"(%8680, %8684) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8686 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8687 = torch.operator "onnx.Unsqueeze"(%8351, %8686) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %8688 = torch.operator "onnx.Add"(%8685, %8687) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8689 = torch.operator "onnx.MatMul"(%8688, %955) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %8690 = torch.operator "onnx.Add"(%372, %8689) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %8691 = torch.operator "onnx.Mul"(%8690, %8690) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %8692 = torch.operator "onnx.Mul"(%8690, %8691) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %8693 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_ff_context_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %8694 = torch.operator "onnx.Mul"(%8693, %8692) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %8695 = torch.operator "onnx.Add"(%8690, %8694) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %8696 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_ff_context_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %8697 = torch.operator "onnx.Mul"(%8696, %8695) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %8698 = torch.operator "onnx.Tanh"(%8697) : (!torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %8699 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_ff_context_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %8700 = torch.operator "onnx.Add"(%8699, %8698) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %8701 = torch.operator "onnx.Mul"(%8690, %8700) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %8702 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_ff_context_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %8703 = torch.operator "onnx.Mul"(%8702, %8701) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %8704 = torch.operator "onnx.MatMul"(%8703, %956) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8705 = torch.operator "onnx.Add"(%373, %8704) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8706 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.17_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8707 = torch.operator "onnx.Unsqueeze"(%8357, %8706) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %8708 = torch.operator "onnx.Mul"(%8707, %8705) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8709 = torch.operator "onnx.Add"(%8677, %8708) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8710 = torch.operator "onnx.Gemm"(%1285, %374, %375) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %8711 = torch.operator "onnx.Shape"(%8710) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %8712 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_norm1_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8713 = torch.operator "onnx.Gather"(%8711, %8712) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8714 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_norm1_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8715 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_norm1_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8716 = torch.operator "onnx.Add"(%8713, %8715) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8717 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_norm1_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8718 = torch.operator "onnx.Div"(%8716, %8717) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8719 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_norm1_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8720 = torch.operator "onnx.Mul"(%8718, %8719) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8721 = torch.operator "onnx.Slice"(%8710, %8714, %8720, %8712) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %8722 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_norm1_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8723 = torch.operator "onnx.Mul"(%8718, %8722) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8724 = torch.operator "onnx.Slice"(%8710, %8720, %8723, %8712) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %8725 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_norm1_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8726 = torch.operator "onnx.Mul"(%8718, %8725) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8727 = torch.operator "onnx.Slice"(%8710, %8723, %8726, %8712) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %8728 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_norm1_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8729 = torch.operator "onnx.Mul"(%8718, %8728) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8730 = torch.operator "onnx.Slice"(%8710, %8726, %8729, %8712) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %8731 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_norm1_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8732 = torch.operator "onnx.Mul"(%8718, %8731) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8733 = torch.operator "onnx.Slice"(%8710, %8729, %8732, %8712) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %8734 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_norm1_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8735 = torch.operator "onnx.Mul"(%8718, %8734) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8736 = torch.operator "onnx.Slice"(%8710, %8732, %8735, %8712) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.182Fnorm12Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.18/norm1/norm/Constant_attr__value" : tensor<3072xbf16> %8737 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.182Fnorm12Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.182Fnorm12Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.18/norm1/norm/Constant_1_attr__value" : tensor<3072xbf16> %8738 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.182Fnorm12Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %8739 = torch.operator "onnx.LayerNormalization"(%8673, %8737, %8738) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %8740 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_norm1_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8741 = torch.operator "onnx.Unsqueeze"(%8724, %8740) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %8742 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_norm1_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %8743 = torch.operator "onnx.Add"(%8741, %8742) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %8744 = torch.operator "onnx.Mul"(%8739, %8743) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %8745 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_norm1_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8746 = torch.operator "onnx.Unsqueeze"(%8721, %8745) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %8747 = torch.operator "onnx.Add"(%8744, %8746) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %8748 = torch.operator "onnx.Gemm"(%1285, %376, %377) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[18432,3072],bf16>, !torch.vtensor<[18432],bf16>) -> !torch.vtensor<[1,18432],bf16> %8749 = torch.operator "onnx.Shape"(%8748) : (!torch.vtensor<[1,18432],bf16>) -> !torch.vtensor<[2],si64> %8750 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_norm1_context_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8751 = torch.operator "onnx.Gather"(%8749, %8750) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8752 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_norm1_context_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8753 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_norm1_context_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8754 = torch.operator "onnx.Add"(%8751, %8753) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8755 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_norm1_context_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8756 = torch.operator "onnx.Div"(%8754, %8755) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8757 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_norm1_context_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8758 = torch.operator "onnx.Mul"(%8756, %8757) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8759 = torch.operator "onnx.Slice"(%8748, %8752, %8758, %8750) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %8760 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_norm1_context_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8761 = torch.operator "onnx.Mul"(%8756, %8760) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8762 = torch.operator "onnx.Slice"(%8748, %8758, %8761, %8750) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %8763 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_norm1_context_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8764 = torch.operator "onnx.Mul"(%8756, %8763) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8765 = torch.operator "onnx.Slice"(%8748, %8761, %8764, %8750) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %8766 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_norm1_context_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8767 = torch.operator "onnx.Mul"(%8756, %8766) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8768 = torch.operator "onnx.Slice"(%8748, %8764, %8767, %8750) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %8769 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_norm1_context_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8770 = torch.operator "onnx.Mul"(%8756, %8769) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8771 = torch.operator "onnx.Slice"(%8748, %8767, %8770, %8750) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %8772 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_norm1_context_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8773 = torch.operator "onnx.Mul"(%8756, %8772) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8774 = torch.operator "onnx.Slice"(%8748, %8770, %8773, %8750) : (!torch.vtensor<[1,18432],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Ftransformer_blocks.182Fnorm1_context2Fnorm2FConstant_attr__value = util.global.load @"/transformer_blocks.18/norm1_context/norm/Constant_attr__value" : tensor<3072xbf16> %8775 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.182Fnorm1_context2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.182Fnorm1_context2Fnorm2FConstant_1_attr__value = util.global.load @"/transformer_blocks.18/norm1_context/norm/Constant_1_attr__value" : tensor<3072xbf16> %8776 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.182Fnorm1_context2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %8777 = torch.operator "onnx.LayerNormalization"(%8709, %8775, %8776) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8778 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_norm1_context_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8779 = torch.operator "onnx.Unsqueeze"(%8762, %8778) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %8780 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_norm1_context_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %8781 = torch.operator "onnx.Add"(%8779, %8780) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %8782 = torch.operator "onnx.Mul"(%8777, %8781) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8783 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_norm1_context_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8784 = torch.operator "onnx.Unsqueeze"(%8759, %8783) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %8785 = torch.operator "onnx.Add"(%8782, %8784) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8786 = torch.operator "onnx.Shape"(%8785) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %8787 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %8788 = torch.operator "onnx.Gather"(%8786, %8787) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %8789 = torch.operator "onnx.MatMul"(%8747, %957) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %8790 = torch.operator "onnx.Add"(%380, %8789) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %8791 = torch.operator "onnx.MatMul"(%8747, %958) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %8792 = torch.operator "onnx.Add"(%381, %8791) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %8793 = torch.operator "onnx.MatMul"(%8747, %959) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %8794 = torch.operator "onnx.Add"(%382, %8793) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %8795 = torch.operator "onnx.Shape"(%8792) : (!torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[3],si64> %8796 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %8797 = torch.operator "onnx.Gather"(%8795, %8796) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %8798 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %8799 = torch.operator "onnx.Div"(%8797, %8798) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %8800 = torch.operator "onnx.Cast"(%8799) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %8801 = torch.operator "onnx.Cast"(%8800) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %8802 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13344_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8803 = torch.operator "onnx.Unsqueeze"(%8788, %8802) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8804 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8805 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8806 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13348_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8807 = torch.operator "onnx.Unsqueeze"(%8801, %8806) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8808 = torch.operator "onnx.Concat"(%8803, %8804, %8805, %8807) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %8809 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13351_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8810 = torch.operator "onnx.Unsqueeze"(%8788, %8809) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8811 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8812 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8813 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13355_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8814 = torch.operator "onnx.Unsqueeze"(%8801, %8813) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8815 = torch.operator "onnx.Concat"(%8810, %8811, %8812, %8814) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %8816 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13358_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8817 = torch.operator "onnx.Unsqueeze"(%8788, %8816) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8818 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8819 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8820 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13362_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8821 = torch.operator "onnx.Unsqueeze"(%8801, %8820) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8822 = torch.operator "onnx.Concat"(%8817, %8818, %8819, %8821) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %8823 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13365_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8824 = torch.operator "onnx.Unsqueeze"(%8788, %8823) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8825 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8826 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_10_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8827 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13369_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8828 = torch.operator "onnx.Unsqueeze"(%8801, %8827) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8829 = torch.operator "onnx.Concat"(%8824, %8825, %8826, %8828) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %8830 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13372_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8831 = torch.operator "onnx.Unsqueeze"(%8788, %8830) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8832 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_11_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8833 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8834 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13376_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8835 = torch.operator "onnx.Unsqueeze"(%8801, %8834) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8836 = torch.operator "onnx.Concat"(%8831, %8832, %8833, %8835) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %8837 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13379_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8838 = torch.operator "onnx.Unsqueeze"(%8788, %8837) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8839 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8840 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_14_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8841 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13383_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8842 = torch.operator "onnx.Unsqueeze"(%8801, %8841) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8843 = torch.operator "onnx.Concat"(%8838, %8839, %8840, %8842) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %8844 = torch.operator "onnx.Reshape"(%8790, %8808) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %8845 = torch.operator "onnx.Transpose"(%8844) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %8846 = torch.operator "onnx.Reshape"(%8792, %8815) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %8847 = torch.operator "onnx.Transpose"(%8846) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %8848 = torch.operator "onnx.Reshape"(%8794, %8822) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %8849 = torch.operator "onnx.Transpose"(%8848) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %8850 = torch.operator "onnx.Cast"(%8845) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %8851 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %8852 = torch.operator "onnx.Pow"(%8850, %8851) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %8853 = torch.operator "onnx.ReduceMean"(%8852) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8854 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %8855 = torch.operator "onnx.Add"(%8853, %8854) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8856 = torch.operator "onnx.Sqrt"(%8855) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8857 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %8858 = torch.operator "onnx.Div"(%8857, %8856) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8859 = torch.operator "onnx.Cast"(%8845) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %8860 = torch.operator "onnx.Mul"(%8859, %8858) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %8861 = torch.operator "onnx.Cast"(%8860) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %8862 = torch.operator "onnx.Mul"(%8861, %378) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %8863 = torch.operator "onnx.Cast"(%8847) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %8864 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %8865 = torch.operator "onnx.Pow"(%8863, %8864) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %8866 = torch.operator "onnx.ReduceMean"(%8865) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8867 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %8868 = torch.operator "onnx.Add"(%8866, %8867) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8869 = torch.operator "onnx.Sqrt"(%8868) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8870 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %8871 = torch.operator "onnx.Div"(%8870, %8869) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8872 = torch.operator "onnx.Cast"(%8847) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %8873 = torch.operator "onnx.Mul"(%8872, %8871) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %8874 = torch.operator "onnx.Cast"(%8873) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %8875 = torch.operator "onnx.Mul"(%8874, %379) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %8876 = torch.operator "onnx.MatMul"(%8785, %960) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8877 = torch.operator "onnx.Add"(%385, %8876) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8878 = torch.operator "onnx.MatMul"(%8785, %961) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8879 = torch.operator "onnx.Add"(%383, %8878) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8880 = torch.operator "onnx.MatMul"(%8785, %962) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8881 = torch.operator "onnx.Add"(%384, %8880) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %8882 = torch.operator "onnx.Reshape"(%8877, %8829) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %8883 = torch.operator "onnx.Transpose"(%8882) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %8884 = torch.operator "onnx.Reshape"(%8879, %8836) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %8885 = torch.operator "onnx.Transpose"(%8884) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %8886 = torch.operator "onnx.Reshape"(%8881, %8843) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %8887 = torch.operator "onnx.Transpose"(%8886) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %8888 = torch.operator "onnx.Cast"(%8883) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %8889 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_norm_added_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %8890 = torch.operator "onnx.Pow"(%8888, %8889) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %8891 = torch.operator "onnx.ReduceMean"(%8890) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8892 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_norm_added_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %8893 = torch.operator "onnx.Add"(%8891, %8892) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8894 = torch.operator "onnx.Sqrt"(%8893) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8895 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_norm_added_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %8896 = torch.operator "onnx.Div"(%8895, %8894) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8897 = torch.operator "onnx.Cast"(%8883) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %8898 = torch.operator "onnx.Mul"(%8897, %8896) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %8899 = torch.operator "onnx.Cast"(%8898) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %8900 = torch.operator "onnx.Mul"(%8899, %388) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %8901 = torch.operator "onnx.Cast"(%8885) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %8902 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_norm_added_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %8903 = torch.operator "onnx.Pow"(%8901, %8902) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %8904 = torch.operator "onnx.ReduceMean"(%8903) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8905 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_norm_added_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %8906 = torch.operator "onnx.Add"(%8904, %8905) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8907 = torch.operator "onnx.Sqrt"(%8906) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8908 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_norm_added_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %8909 = torch.operator "onnx.Div"(%8908, %8907) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %8910 = torch.operator "onnx.Cast"(%8885) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %8911 = torch.operator "onnx.Mul"(%8910, %8909) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %8912 = torch.operator "onnx.Cast"(%8911) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %8913 = torch.operator "onnx.Mul"(%8912, %389) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %8914 = torch.operator "onnx.Concat"(%8900, %8862) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %8915 = torch.operator "onnx.Concat"(%8913, %8875) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %8916 = torch.operator "onnx.Concat"(%8887, %8849) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %8917 = torch.operator "onnx.Shape"(%8914) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %8918 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_15_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %8919 = torch.operator "onnx.Gather"(%8917, %8918) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %8920 = torch.operator "onnx.Shape"(%8914) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %8921 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_16_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %8922 = torch.operator "onnx.Gather"(%8920, %8921) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %8923 = torch.operator "onnx.Shape"(%8914) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %8924 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_17_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %8925 = torch.operator "onnx.Gather"(%8923, %8924) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %8926 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13468_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8927 = torch.operator "onnx.Unsqueeze"(%8919, %8926) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8928 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13470_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8929 = torch.operator "onnx.Unsqueeze"(%8922, %8928) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8930 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13472_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8931 = torch.operator "onnx.Unsqueeze"(%8925, %8930) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8932 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8933 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8934 = torch.operator "onnx.Concat"(%8927, %8929, %8931, %8932, %8933) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %8935 = torch.operator "onnx.Reshape"(%8914, %8934) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %8936 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_20_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %8937:2 = torch.operator "onnx.Split"(%8935, %8936) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %8938 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8939 = torch.operator "onnx.Squeeze"(%8937#0, %8938) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %8940 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8941 = torch.operator "onnx.Squeeze"(%8937#1, %8940) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %8942 = torch.operator "onnx.Neg"(%8941) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %8943 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_23_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8944 = torch.operator "onnx.Unsqueeze"(%8942, %8943) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %8945 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_24_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8946 = torch.operator "onnx.Unsqueeze"(%8939, %8945) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %8947 = torch.operator "onnx.Concat"(%8944, %8946) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %8948 = torch.operator "onnx.Shape"(%8947) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %8949 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_25_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8950 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8951 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8952 = torch.operator "onnx.Slice"(%8948, %8950, %8951, %8949) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %8953 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_28_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8954 = torch.operator "onnx.Concat"(%8952, %8953) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %8955 = torch.operator "onnx.Reshape"(%8947, %8954) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %8956 = torch.operator "onnx.Cast"(%8914) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %8957 = torch.operator "onnx.Mul"(%8956, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %8958 = torch.operator "onnx.Cast"(%8955) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %8959 = torch.operator "onnx.Mul"(%8958, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %8960 = torch.operator "onnx.Add"(%8957, %8959) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %8961 = torch.operator "onnx.Cast"(%8960) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %8962 = torch.operator "onnx.Shape"(%8915) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %8963 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_29_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %8964 = torch.operator "onnx.Gather"(%8962, %8963) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %8965 = torch.operator "onnx.Shape"(%8915) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %8966 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_30_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %8967 = torch.operator "onnx.Gather"(%8965, %8966) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %8968 = torch.operator "onnx.Shape"(%8915) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %8969 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_31_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %8970 = torch.operator "onnx.Gather"(%8968, %8969) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %8971 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13513_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8972 = torch.operator "onnx.Unsqueeze"(%8964, %8971) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8973 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13515_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8974 = torch.operator "onnx.Unsqueeze"(%8967, %8973) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8975 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13517_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8976 = torch.operator "onnx.Unsqueeze"(%8970, %8975) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %8977 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8978 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8979 = torch.operator "onnx.Concat"(%8972, %8974, %8976, %8977, %8978) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %8980 = torch.operator "onnx.Reshape"(%8915, %8979) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %8981 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_34_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %8982:2 = torch.operator "onnx.Split"(%8980, %8981) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %8983 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8984 = torch.operator "onnx.Squeeze"(%8982#0, %8983) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %8985 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8986 = torch.operator "onnx.Squeeze"(%8982#1, %8985) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %8987 = torch.operator "onnx.Neg"(%8986) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %8988 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8989 = torch.operator "onnx.Unsqueeze"(%8987, %8988) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %8990 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8991 = torch.operator "onnx.Unsqueeze"(%8984, %8990) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %8992 = torch.operator "onnx.Concat"(%8989, %8991) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %8993 = torch.operator "onnx.Shape"(%8992) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %8994 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_39_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8995 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_40_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8996 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8997 = torch.operator "onnx.Slice"(%8993, %8995, %8996, %8994) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %8998 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_42_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %8999 = torch.operator "onnx.Concat"(%8997, %8998) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %9000 = torch.operator "onnx.Reshape"(%8992, %8999) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %9001 = torch.operator "onnx.Cast"(%8915) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %9002 = torch.operator "onnx.Mul"(%9001, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %9003 = torch.operator "onnx.Cast"(%9000) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %9004 = torch.operator "onnx.Mul"(%9003, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %9005 = torch.operator "onnx.Add"(%9002, %9004) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %9006 = torch.operator "onnx.Cast"(%9005) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %9007 = torch.operator "onnx.Shape"(%8961) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %9008 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_43_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9009 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_44_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9010 = torch.operator "onnx.Slice"(%9007, %9008, %9009) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9011 = torch.operator "onnx.Cast"(%9010) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %9012 = torch.operator "onnx.Sqrt"(%9011) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %9013 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_45_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %9014 = torch.operator "onnx.Cast"(%9012) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %9015 = torch.operator "onnx.Div"(%9013, %9014) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %9016 = torch.operator "onnx.Cast"(%9015) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %9017 = torch.operator "onnx.Transpose"(%9006) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %9018 = torch.operator "onnx.Sqrt"(%9016) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %9019 = torch.operator "onnx.Mul"(%8961, %9018) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %9020 = torch.operator "onnx.Sqrt"(%9016) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %9021 = torch.operator "onnx.Mul"(%9017, %9020) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %9022 = torch.operator "onnx.MatMul"(%9019, %9021) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %9023 = torch.operator "onnx.Softmax"(%9022) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %9024 = torch.operator "onnx.MatMul"(%9023, %8916) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %9025 = torch.operator "onnx.Transpose"(%9024) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %9026 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_46_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9027 = torch.operator "onnx.Mul"(%8801, %9026) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9028 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13570_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9029 = torch.operator "onnx.Unsqueeze"(%8788, %9028) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9030 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_47_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9031 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13573_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9032 = torch.operator "onnx.Unsqueeze"(%9027, %9031) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9033 = torch.operator "onnx.Concat"(%9029, %9030, %9032) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %9034 = torch.operator "onnx.Reshape"(%9025, %9033) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %9035 = torch.operator "onnx.Cast"(%9034) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %9036 = torch.operator "onnx.Shape"(%8785) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %9037 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_48_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9038 = torch.operator "onnx.Gather"(%9036, %9037) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9039 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_49_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9040 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_50_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9041 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_51_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9042 = torch.operator "onnx.Unsqueeze"(%9038, %9041) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9043 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_52_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9044 = torch.operator "onnx.Slice"(%9035, %9040, %9042, %9039, %9043) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %9045 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_53_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9046 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_54_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9047 = torch.operator "onnx.Unsqueeze"(%9038, %9046) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9048 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_55_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9049 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_attn_Constant_56_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9050 = torch.operator "onnx.Slice"(%9035, %9047, %9048, %9045, %9049) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %9051 = torch.operator "onnx.MatMul"(%9050, %963) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %9052 = torch.operator "onnx.Add"(%386, %9051) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %9053 = torch.operator "onnx.MatMul"(%9044, %964) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %9054 = torch.operator "onnx.Add"(%387, %9053) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %9055 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9056 = torch.operator "onnx.Unsqueeze"(%8727, %9055) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %9057 = torch.operator "onnx.Mul"(%9056, %9052) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %9058 = torch.operator "onnx.Add"(%8673, %9057) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %_2Ftransformer_blocks.182Fnorm22FConstant_attr__value = util.global.load @"/transformer_blocks.18/norm2/Constant_attr__value" : tensor<3072xbf16> %9059 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.182Fnorm22FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.182Fnorm22FConstant_1_attr__value = util.global.load @"/transformer_blocks.18/norm2/Constant_1_attr__value" : tensor<3072xbf16> %9060 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.182Fnorm22FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %9061 = torch.operator "onnx.LayerNormalization"(%9058, %9059, %9060) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %9062 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9063 = torch.operator "onnx.Unsqueeze"(%8733, %9062) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %9064 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %9065 = torch.operator "onnx.Add"(%9063, %9064) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %9066 = torch.operator "onnx.Mul"(%9061, %9065) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %9067 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9068 = torch.operator "onnx.Unsqueeze"(%8730, %9067) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %9069 = torch.operator "onnx.Add"(%9066, %9068) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %9070 = torch.operator "onnx.MatMul"(%9069, %965) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %9071 = torch.operator "onnx.Add"(%390, %9070) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %9072 = torch.operator "onnx.Mul"(%9071, %9071) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %9073 = torch.operator "onnx.Mul"(%9071, %9072) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %9074 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_ff_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %9075 = torch.operator "onnx.Mul"(%9074, %9073) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %9076 = torch.operator "onnx.Add"(%9071, %9075) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %9077 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_ff_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %9078 = torch.operator "onnx.Mul"(%9077, %9076) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %9079 = torch.operator "onnx.Tanh"(%9078) : (!torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %9080 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_ff_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %9081 = torch.operator "onnx.Add"(%9080, %9079) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %9082 = torch.operator "onnx.Mul"(%9071, %9081) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %9083 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_ff_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %9084 = torch.operator "onnx.Mul"(%9083, %9082) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4096,12288],bf16>) -> !torch.vtensor<[?,4096,12288],bf16> %9085 = torch.operator "onnx.MatMul"(%9084, %966) : (!torch.vtensor<[?,4096,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %9086 = torch.operator "onnx.Add"(%391, %9085) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %9087 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9088 = torch.operator "onnx.Unsqueeze"(%8736, %9087) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %9089 = torch.operator "onnx.Mul"(%9088, %9086) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %9090 = torch.operator "onnx.Add"(%9058, %9089) : (!torch.vtensor<[?,4096,3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4096,3072],bf16> %9091 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9092 = torch.operator "onnx.Unsqueeze"(%8765, %9091) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %9093 = torch.operator "onnx.Mul"(%9092, %9054) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,?,3072],bf16> %9094 = torch.operator "onnx.Add"(%8709, %9093) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,?,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %_2Ftransformer_blocks.182Fnorm2_context2FConstant_attr__value = util.global.load @"/transformer_blocks.18/norm2_context/Constant_attr__value" : tensor<3072xbf16> %9095 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.182Fnorm2_context2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Ftransformer_blocks.182Fnorm2_context2FConstant_1_attr__value = util.global.load @"/transformer_blocks.18/norm2_context/Constant_1_attr__value" : tensor<3072xbf16> %9096 = torch_c.from_builtin_tensor %_2Ftransformer_blocks.182Fnorm2_context2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %9097 = torch.operator "onnx.LayerNormalization"(%9094, %9095, %9096) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %9098 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9099 = torch.operator "onnx.Unsqueeze"(%8771, %9098) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %9100 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_Constant_7_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %9101 = torch.operator "onnx.Add"(%9099, %9100) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %9102 = torch.operator "onnx.Mul"(%9097, %9101) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %9103 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9104 = torch.operator "onnx.Unsqueeze"(%8768, %9103) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %9105 = torch.operator "onnx.Add"(%9102, %9104) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %9106 = torch.operator "onnx.MatMul"(%9105, %967) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %9107 = torch.operator "onnx.Add"(%392, %9106) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %9108 = torch.operator "onnx.Mul"(%9107, %9107) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %9109 = torch.operator "onnx.Mul"(%9107, %9108) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %9110 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_ff_context_net.0_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %9111 = torch.operator "onnx.Mul"(%9110, %9109) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %9112 = torch.operator "onnx.Add"(%9107, %9111) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %9113 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_ff_context_net.0_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %9114 = torch.operator "onnx.Mul"(%9113, %9112) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %9115 = torch.operator "onnx.Tanh"(%9114) : (!torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %9116 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_ff_context_net.0_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %9117 = torch.operator "onnx.Add"(%9116, %9115) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %9118 = torch.operator "onnx.Mul"(%9107, %9117) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %9119 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_ff_context_net.0_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %9120 = torch.operator "onnx.Mul"(%9119, %9118) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,512,12288],bf16>) -> !torch.vtensor<[?,512,12288],bf16> %9121 = torch.operator "onnx.MatMul"(%9120, %968) : (!torch.vtensor<[?,512,12288],bf16>, !torch.vtensor<[12288,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %9122 = torch.operator "onnx.Add"(%393, %9121) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %9123 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__transformer_blocks.18_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9124 = torch.operator "onnx.Unsqueeze"(%8774, %9123) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %9125 = torch.operator "onnx.Mul"(%9124, %9122) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %9126 = torch.operator "onnx.Add"(%9094, %9125) : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[?,512,3072],bf16> %9127 = torch.operator "onnx.Concat"(%9126, %9090) {torch.onnx.axis = 1 : si64} : (!torch.vtensor<[?,512,3072],bf16>, !torch.vtensor<[?,4096,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9128 = torch.operator "onnx.Gemm"(%1285, %394, %395) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %9129 = torch.operator "onnx.Shape"(%9128) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %9130 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9131 = torch.operator "onnx.Gather"(%9129, %9130) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9132 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9133 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9134 = torch.operator "onnx.Add"(%9131, %9133) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9135 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9136 = torch.operator "onnx.Div"(%9134, %9135) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9137 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9138 = torch.operator "onnx.Mul"(%9136, %9137) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9139 = torch.operator "onnx.Slice"(%9128, %9132, %9138, %9130) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %9140 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9141 = torch.operator "onnx.Mul"(%9136, %9140) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9142 = torch.operator "onnx.Slice"(%9128, %9138, %9141, %9130) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %9143 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9144 = torch.operator "onnx.Mul"(%9136, %9143) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9145 = torch.operator "onnx.Slice"(%9128, %9141, %9144, %9130) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.02Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.0/norm/norm/Constant_attr__value" : tensor<3072xbf16> %9146 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.02Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.02Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.0/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %9147 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.02Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %9148 = torch.operator "onnx.LayerNormalization"(%9127, %9146, %9147) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9149 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9150 = torch.operator "onnx.Unsqueeze"(%9142, %9149) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %9151 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %9152 = torch.operator "onnx.Add"(%9150, %9151) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %9153 = torch.operator "onnx.Mul"(%9148, %9152) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9154 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9155 = torch.operator "onnx.Unsqueeze"(%9139, %9154) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %9156 = torch.operator "onnx.Add"(%9153, %9155) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9157 = torch.operator "onnx.MatMul"(%9156, %969) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9158 = torch.operator "onnx.Add"(%396, %9157) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9159 = torch.operator "onnx.Mul"(%9158, %9158) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9160 = torch.operator "onnx.Mul"(%9158, %9159) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9161 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %9162 = torch.operator "onnx.Mul"(%9161, %9160) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9163 = torch.operator "onnx.Add"(%9158, %9162) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9164 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %9165 = torch.operator "onnx.Mul"(%9164, %9163) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9166 = torch.operator "onnx.Tanh"(%9165) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9167 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %9168 = torch.operator "onnx.Add"(%9167, %9166) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9169 = torch.operator "onnx.Mul"(%9158, %9168) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9170 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %9171 = torch.operator "onnx.Mul"(%9170, %9169) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9172 = torch.operator "onnx.Shape"(%9156) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %9173 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9174 = torch.operator "onnx.Gather"(%9172, %9173) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9175 = torch.operator "onnx.MatMul"(%9156, %970) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9176 = torch.operator "onnx.Add"(%400, %9175) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9177 = torch.operator "onnx.MatMul"(%9156, %971) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9178 = torch.operator "onnx.Add"(%401, %9177) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9179 = torch.operator "onnx.MatMul"(%9156, %972) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9180 = torch.operator "onnx.Add"(%402, %9179) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9181 = torch.operator "onnx.Shape"(%9178) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %9182 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9183 = torch.operator "onnx.Gather"(%9181, %9182) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9184 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9185 = torch.operator "onnx.Div"(%9183, %9184) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9186 = torch.operator "onnx.Cast"(%9185) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9187 = torch.operator "onnx.Cast"(%9186) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9188 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13730_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9189 = torch.operator "onnx.Unsqueeze"(%9174, %9188) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9190 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9191 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9192 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13734_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9193 = torch.operator "onnx.Unsqueeze"(%9187, %9192) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9194 = torch.operator "onnx.Concat"(%9189, %9190, %9191, %9193) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %9195 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13737_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9196 = torch.operator "onnx.Unsqueeze"(%9174, %9195) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9197 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9198 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9199 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13741_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9200 = torch.operator "onnx.Unsqueeze"(%9187, %9199) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9201 = torch.operator "onnx.Concat"(%9196, %9197, %9198, %9200) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %9202 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13744_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9203 = torch.operator "onnx.Unsqueeze"(%9174, %9202) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9204 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9205 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9206 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13748_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9207 = torch.operator "onnx.Unsqueeze"(%9187, %9206) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9208 = torch.operator "onnx.Concat"(%9203, %9204, %9205, %9207) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %9209 = torch.operator "onnx.Reshape"(%9176, %9194) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %9210 = torch.operator "onnx.Transpose"(%9209) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %9211 = torch.operator "onnx.Reshape"(%9178, %9201) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %9212 = torch.operator "onnx.Transpose"(%9211) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %9213 = torch.operator "onnx.Reshape"(%9180, %9208) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %9214 = torch.operator "onnx.Transpose"(%9213) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %9215 = torch.operator "onnx.Cast"(%9210) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %9216 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %9217 = torch.operator "onnx.Pow"(%9215, %9216) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %9218 = torch.operator "onnx.ReduceMean"(%9217) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %9219 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %9220 = torch.operator "onnx.Add"(%9218, %9219) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %9221 = torch.operator "onnx.Sqrt"(%9220) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %9222 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %9223 = torch.operator "onnx.Div"(%9222, %9221) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %9224 = torch.operator "onnx.Cast"(%9210) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %9225 = torch.operator "onnx.Mul"(%9224, %9223) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %9226 = torch.operator "onnx.Cast"(%9225) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %9227 = torch.operator "onnx.Mul"(%9226, %398) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %9228 = torch.operator "onnx.Cast"(%9212) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %9229 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %9230 = torch.operator "onnx.Pow"(%9228, %9229) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %9231 = torch.operator "onnx.ReduceMean"(%9230) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %9232 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %9233 = torch.operator "onnx.Add"(%9231, %9232) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %9234 = torch.operator "onnx.Sqrt"(%9233) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %9235 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %9236 = torch.operator "onnx.Div"(%9235, %9234) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %9237 = torch.operator "onnx.Cast"(%9212) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %9238 = torch.operator "onnx.Mul"(%9237, %9236) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %9239 = torch.operator "onnx.Cast"(%9238) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %9240 = torch.operator "onnx.Mul"(%9239, %399) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %9241 = torch.operator "onnx.Shape"(%9227) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %9242 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9243 = torch.operator "onnx.Gather"(%9241, %9242) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9244 = torch.operator "onnx.Shape"(%9227) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %9245 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9246 = torch.operator "onnx.Gather"(%9244, %9245) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9247 = torch.operator "onnx.Shape"(%9227) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %9248 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9249 = torch.operator "onnx.Gather"(%9247, %9248) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9250 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13792_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9251 = torch.operator "onnx.Unsqueeze"(%9243, %9250) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9252 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13794_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9253 = torch.operator "onnx.Unsqueeze"(%9246, %9252) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9254 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13796_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9255 = torch.operator "onnx.Unsqueeze"(%9249, %9254) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9256 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9257 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9258 = torch.operator "onnx.Concat"(%9251, %9253, %9255, %9256, %9257) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %9259 = torch.operator "onnx.Reshape"(%9227, %9258) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %9260 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %9261:2 = torch.operator "onnx.Split"(%9259, %9260) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %9262 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9263 = torch.operator "onnx.Squeeze"(%9261#0, %9262) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %9264 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9265 = torch.operator "onnx.Squeeze"(%9261#1, %9264) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %9266 = torch.operator "onnx.Neg"(%9265) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %9267 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9268 = torch.operator "onnx.Unsqueeze"(%9266, %9267) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %9269 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9270 = torch.operator "onnx.Unsqueeze"(%9263, %9269) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %9271 = torch.operator "onnx.Concat"(%9268, %9270) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %9272 = torch.operator "onnx.Shape"(%9271) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %9273 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9274 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9275 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9276 = torch.operator "onnx.Slice"(%9272, %9274, %9275, %9273) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %9277 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9278 = torch.operator "onnx.Concat"(%9276, %9277) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %9279 = torch.operator "onnx.Reshape"(%9271, %9278) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %9280 = torch.operator "onnx.Cast"(%9227) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %9281 = torch.operator "onnx.Mul"(%9280, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %9282 = torch.operator "onnx.Cast"(%9279) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %9283 = torch.operator "onnx.Mul"(%9282, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %9284 = torch.operator "onnx.Add"(%9281, %9283) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %9285 = torch.operator "onnx.Cast"(%9284) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %9286 = torch.operator "onnx.Shape"(%9240) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %9287 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9288 = torch.operator "onnx.Gather"(%9286, %9287) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9289 = torch.operator "onnx.Shape"(%9240) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %9290 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9291 = torch.operator "onnx.Gather"(%9289, %9290) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9292 = torch.operator "onnx.Shape"(%9240) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %9293 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9294 = torch.operator "onnx.Gather"(%9292, %9293) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9295 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13837_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9296 = torch.operator "onnx.Unsqueeze"(%9288, %9295) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9297 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13839_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9298 = torch.operator "onnx.Unsqueeze"(%9291, %9297) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9299 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13841_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9300 = torch.operator "onnx.Unsqueeze"(%9294, %9299) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9301 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9302 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9303 = torch.operator "onnx.Concat"(%9296, %9298, %9300, %9301, %9302) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %9304 = torch.operator "onnx.Reshape"(%9240, %9303) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %9305 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %9306:2 = torch.operator "onnx.Split"(%9304, %9305) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %9307 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9308 = torch.operator "onnx.Squeeze"(%9306#0, %9307) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %9309 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9310 = torch.operator "onnx.Squeeze"(%9306#1, %9309) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %9311 = torch.operator "onnx.Neg"(%9310) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %9312 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9313 = torch.operator "onnx.Unsqueeze"(%9311, %9312) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %9314 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9315 = torch.operator "onnx.Unsqueeze"(%9308, %9314) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %9316 = torch.operator "onnx.Concat"(%9313, %9315) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %9317 = torch.operator "onnx.Shape"(%9316) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %9318 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9319 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9320 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9321 = torch.operator "onnx.Slice"(%9317, %9319, %9320, %9318) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %9322 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9323 = torch.operator "onnx.Concat"(%9321, %9322) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %9324 = torch.operator "onnx.Reshape"(%9316, %9323) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %9325 = torch.operator "onnx.Cast"(%9240) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %9326 = torch.operator "onnx.Mul"(%9325, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %9327 = torch.operator "onnx.Cast"(%9324) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %9328 = torch.operator "onnx.Mul"(%9327, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %9329 = torch.operator "onnx.Add"(%9326, %9328) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %9330 = torch.operator "onnx.Cast"(%9329) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %9331 = torch.operator "onnx.Shape"(%9285) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %9332 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9333 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9334 = torch.operator "onnx.Slice"(%9331, %9332, %9333) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9335 = torch.operator "onnx.Cast"(%9334) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %9336 = torch.operator "onnx.Sqrt"(%9335) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %9337 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %9338 = torch.operator "onnx.Cast"(%9336) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %9339 = torch.operator "onnx.Div"(%9337, %9338) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %9340 = torch.operator "onnx.Cast"(%9339) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %9341 = torch.operator "onnx.Transpose"(%9330) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %9342 = torch.operator "onnx.Sqrt"(%9340) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %9343 = torch.operator "onnx.Mul"(%9285, %9342) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %9344 = torch.operator "onnx.Sqrt"(%9340) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %9345 = torch.operator "onnx.Mul"(%9341, %9344) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %9346 = torch.operator "onnx.MatMul"(%9343, %9345) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %9347 = torch.operator "onnx.Softmax"(%9346) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %9348 = torch.operator "onnx.MatMul"(%9347, %9214) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %9349 = torch.operator "onnx.Transpose"(%9348) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %9350 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9351 = torch.operator "onnx.Mul"(%9187, %9350) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9352 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13894_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9353 = torch.operator "onnx.Unsqueeze"(%9174, %9352) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9354 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9355 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13897_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9356 = torch.operator "onnx.Unsqueeze"(%9351, %9355) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9357 = torch.operator "onnx.Concat"(%9353, %9354, %9356) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %9358 = torch.operator "onnx.Reshape"(%9349, %9357) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %9359 = torch.operator "onnx.Cast"(%9358) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %9360 = torch.operator "onnx.Concat"(%9359, %9171) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %9361 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.0_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9362 = torch.operator "onnx.Unsqueeze"(%9145, %9361) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %9363 = torch.operator "onnx.MatMul"(%9360, %973) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9364 = torch.operator "onnx.Add"(%397, %9363) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9365 = torch.operator "onnx.Mul"(%9362, %9364) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9366 = torch.operator "onnx.Add"(%9127, %9365) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9367 = torch.operator "onnx.Gemm"(%1285, %403, %404) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %9368 = torch.operator "onnx.Shape"(%9367) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %9369 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9370 = torch.operator "onnx.Gather"(%9368, %9369) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9371 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9372 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9373 = torch.operator "onnx.Add"(%9370, %9372) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9374 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9375 = torch.operator "onnx.Div"(%9373, %9374) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9376 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9377 = torch.operator "onnx.Mul"(%9375, %9376) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9378 = torch.operator "onnx.Slice"(%9367, %9371, %9377, %9369) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %9379 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9380 = torch.operator "onnx.Mul"(%9375, %9379) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9381 = torch.operator "onnx.Slice"(%9367, %9377, %9380, %9369) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %9382 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9383 = torch.operator "onnx.Mul"(%9375, %9382) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9384 = torch.operator "onnx.Slice"(%9367, %9380, %9383, %9369) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.12Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.1/norm/norm/Constant_attr__value" : tensor<3072xbf16> %9385 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.12Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.12Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.1/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %9386 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.12Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %9387 = torch.operator "onnx.LayerNormalization"(%9366, %9385, %9386) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9388 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9389 = torch.operator "onnx.Unsqueeze"(%9381, %9388) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %9390 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %9391 = torch.operator "onnx.Add"(%9389, %9390) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %9392 = torch.operator "onnx.Mul"(%9387, %9391) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9393 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9394 = torch.operator "onnx.Unsqueeze"(%9378, %9393) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %9395 = torch.operator "onnx.Add"(%9392, %9394) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9396 = torch.operator "onnx.MatMul"(%9395, %974) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9397 = torch.operator "onnx.Add"(%405, %9396) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9398 = torch.operator "onnx.Mul"(%9397, %9397) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9399 = torch.operator "onnx.Mul"(%9397, %9398) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9400 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %9401 = torch.operator "onnx.Mul"(%9400, %9399) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9402 = torch.operator "onnx.Add"(%9397, %9401) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9403 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %9404 = torch.operator "onnx.Mul"(%9403, %9402) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9405 = torch.operator "onnx.Tanh"(%9404) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9406 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %9407 = torch.operator "onnx.Add"(%9406, %9405) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9408 = torch.operator "onnx.Mul"(%9397, %9407) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9409 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %9410 = torch.operator "onnx.Mul"(%9409, %9408) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9411 = torch.operator "onnx.Shape"(%9395) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %9412 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9413 = torch.operator "onnx.Gather"(%9411, %9412) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9414 = torch.operator "onnx.MatMul"(%9395, %975) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9415 = torch.operator "onnx.Add"(%409, %9414) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9416 = torch.operator "onnx.MatMul"(%9395, %976) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9417 = torch.operator "onnx.Add"(%410, %9416) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9418 = torch.operator "onnx.MatMul"(%9395, %977) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9419 = torch.operator "onnx.Add"(%411, %9418) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9420 = torch.operator "onnx.Shape"(%9417) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %9421 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9422 = torch.operator "onnx.Gather"(%9420, %9421) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9423 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9424 = torch.operator "onnx.Div"(%9422, %9423) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9425 = torch.operator "onnx.Cast"(%9424) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9426 = torch.operator "onnx.Cast"(%9425) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9427 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13969_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9428 = torch.operator "onnx.Unsqueeze"(%9413, %9427) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9429 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9430 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9431 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13973_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9432 = torch.operator "onnx.Unsqueeze"(%9426, %9431) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9433 = torch.operator "onnx.Concat"(%9428, %9429, %9430, %9432) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %9434 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13976_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9435 = torch.operator "onnx.Unsqueeze"(%9413, %9434) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9436 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9437 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9438 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13980_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9439 = torch.operator "onnx.Unsqueeze"(%9426, %9438) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9440 = torch.operator "onnx.Concat"(%9435, %9436, %9437, %9439) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %9441 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13983_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9442 = torch.operator "onnx.Unsqueeze"(%9413, %9441) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9443 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9444 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9445 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_13987_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9446 = torch.operator "onnx.Unsqueeze"(%9426, %9445) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9447 = torch.operator "onnx.Concat"(%9442, %9443, %9444, %9446) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %9448 = torch.operator "onnx.Reshape"(%9415, %9433) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %9449 = torch.operator "onnx.Transpose"(%9448) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %9450 = torch.operator "onnx.Reshape"(%9417, %9440) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %9451 = torch.operator "onnx.Transpose"(%9450) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %9452 = torch.operator "onnx.Reshape"(%9419, %9447) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %9453 = torch.operator "onnx.Transpose"(%9452) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %9454 = torch.operator "onnx.Cast"(%9449) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %9455 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %9456 = torch.operator "onnx.Pow"(%9454, %9455) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %9457 = torch.operator "onnx.ReduceMean"(%9456) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %9458 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %9459 = torch.operator "onnx.Add"(%9457, %9458) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %9460 = torch.operator "onnx.Sqrt"(%9459) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %9461 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %9462 = torch.operator "onnx.Div"(%9461, %9460) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %9463 = torch.operator "onnx.Cast"(%9449) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %9464 = torch.operator "onnx.Mul"(%9463, %9462) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %9465 = torch.operator "onnx.Cast"(%9464) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %9466 = torch.operator "onnx.Mul"(%9465, %407) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %9467 = torch.operator "onnx.Cast"(%9451) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %9468 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %9469 = torch.operator "onnx.Pow"(%9467, %9468) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %9470 = torch.operator "onnx.ReduceMean"(%9469) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %9471 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %9472 = torch.operator "onnx.Add"(%9470, %9471) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %9473 = torch.operator "onnx.Sqrt"(%9472) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %9474 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %9475 = torch.operator "onnx.Div"(%9474, %9473) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %9476 = torch.operator "onnx.Cast"(%9451) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %9477 = torch.operator "onnx.Mul"(%9476, %9475) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %9478 = torch.operator "onnx.Cast"(%9477) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %9479 = torch.operator "onnx.Mul"(%9478, %408) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %9480 = torch.operator "onnx.Shape"(%9466) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %9481 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9482 = torch.operator "onnx.Gather"(%9480, %9481) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9483 = torch.operator "onnx.Shape"(%9466) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %9484 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9485 = torch.operator "onnx.Gather"(%9483, %9484) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9486 = torch.operator "onnx.Shape"(%9466) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %9487 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9488 = torch.operator "onnx.Gather"(%9486, %9487) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9489 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14031_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9490 = torch.operator "onnx.Unsqueeze"(%9482, %9489) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9491 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14033_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9492 = torch.operator "onnx.Unsqueeze"(%9485, %9491) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9493 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14035_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9494 = torch.operator "onnx.Unsqueeze"(%9488, %9493) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9495 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9496 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9497 = torch.operator "onnx.Concat"(%9490, %9492, %9494, %9495, %9496) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %9498 = torch.operator "onnx.Reshape"(%9466, %9497) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %9499 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %9500:2 = torch.operator "onnx.Split"(%9498, %9499) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %9501 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9502 = torch.operator "onnx.Squeeze"(%9500#0, %9501) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %9503 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9504 = torch.operator "onnx.Squeeze"(%9500#1, %9503) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %9505 = torch.operator "onnx.Neg"(%9504) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %9506 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9507 = torch.operator "onnx.Unsqueeze"(%9505, %9506) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %9508 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9509 = torch.operator "onnx.Unsqueeze"(%9502, %9508) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %9510 = torch.operator "onnx.Concat"(%9507, %9509) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %9511 = torch.operator "onnx.Shape"(%9510) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %9512 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9513 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9514 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9515 = torch.operator "onnx.Slice"(%9511, %9513, %9514, %9512) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %9516 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9517 = torch.operator "onnx.Concat"(%9515, %9516) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %9518 = torch.operator "onnx.Reshape"(%9510, %9517) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %9519 = torch.operator "onnx.Cast"(%9466) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %9520 = torch.operator "onnx.Mul"(%9519, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %9521 = torch.operator "onnx.Cast"(%9518) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %9522 = torch.operator "onnx.Mul"(%9521, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %9523 = torch.operator "onnx.Add"(%9520, %9522) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %9524 = torch.operator "onnx.Cast"(%9523) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %9525 = torch.operator "onnx.Shape"(%9479) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %9526 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9527 = torch.operator "onnx.Gather"(%9525, %9526) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9528 = torch.operator "onnx.Shape"(%9479) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %9529 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9530 = torch.operator "onnx.Gather"(%9528, %9529) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9531 = torch.operator "onnx.Shape"(%9479) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %9532 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9533 = torch.operator "onnx.Gather"(%9531, %9532) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9534 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14076_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9535 = torch.operator "onnx.Unsqueeze"(%9527, %9534) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9536 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14078_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9537 = torch.operator "onnx.Unsqueeze"(%9530, %9536) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9538 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14080_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9539 = torch.operator "onnx.Unsqueeze"(%9533, %9538) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9540 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9541 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9542 = torch.operator "onnx.Concat"(%9535, %9537, %9539, %9540, %9541) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %9543 = torch.operator "onnx.Reshape"(%9479, %9542) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %9544 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %9545:2 = torch.operator "onnx.Split"(%9543, %9544) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %9546 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9547 = torch.operator "onnx.Squeeze"(%9545#0, %9546) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %9548 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9549 = torch.operator "onnx.Squeeze"(%9545#1, %9548) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %9550 = torch.operator "onnx.Neg"(%9549) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %9551 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9552 = torch.operator "onnx.Unsqueeze"(%9550, %9551) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %9553 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9554 = torch.operator "onnx.Unsqueeze"(%9547, %9553) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %9555 = torch.operator "onnx.Concat"(%9552, %9554) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %9556 = torch.operator "onnx.Shape"(%9555) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %9557 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9558 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9559 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9560 = torch.operator "onnx.Slice"(%9556, %9558, %9559, %9557) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %9561 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9562 = torch.operator "onnx.Concat"(%9560, %9561) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %9563 = torch.operator "onnx.Reshape"(%9555, %9562) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %9564 = torch.operator "onnx.Cast"(%9479) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %9565 = torch.operator "onnx.Mul"(%9564, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %9566 = torch.operator "onnx.Cast"(%9563) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %9567 = torch.operator "onnx.Mul"(%9566, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %9568 = torch.operator "onnx.Add"(%9565, %9567) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %9569 = torch.operator "onnx.Cast"(%9568) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %9570 = torch.operator "onnx.Shape"(%9524) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %9571 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9572 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9573 = torch.operator "onnx.Slice"(%9570, %9571, %9572) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9574 = torch.operator "onnx.Cast"(%9573) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %9575 = torch.operator "onnx.Sqrt"(%9574) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %9576 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %9577 = torch.operator "onnx.Cast"(%9575) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %9578 = torch.operator "onnx.Div"(%9576, %9577) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %9579 = torch.operator "onnx.Cast"(%9578) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %9580 = torch.operator "onnx.Transpose"(%9569) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %9581 = torch.operator "onnx.Sqrt"(%9579) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %9582 = torch.operator "onnx.Mul"(%9524, %9581) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %9583 = torch.operator "onnx.Sqrt"(%9579) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %9584 = torch.operator "onnx.Mul"(%9580, %9583) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %9585 = torch.operator "onnx.MatMul"(%9582, %9584) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %9586 = torch.operator "onnx.Softmax"(%9585) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %9587 = torch.operator "onnx.MatMul"(%9586, %9453) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %9588 = torch.operator "onnx.Transpose"(%9587) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %9589 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9590 = torch.operator "onnx.Mul"(%9426, %9589) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9591 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14133_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9592 = torch.operator "onnx.Unsqueeze"(%9413, %9591) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9593 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9594 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14136_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9595 = torch.operator "onnx.Unsqueeze"(%9590, %9594) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9596 = torch.operator "onnx.Concat"(%9592, %9593, %9595) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %9597 = torch.operator "onnx.Reshape"(%9588, %9596) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %9598 = torch.operator "onnx.Cast"(%9597) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %9599 = torch.operator "onnx.Concat"(%9598, %9410) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %9600 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.1_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9601 = torch.operator "onnx.Unsqueeze"(%9384, %9600) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %9602 = torch.operator "onnx.MatMul"(%9599, %978) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9603 = torch.operator "onnx.Add"(%406, %9602) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9604 = torch.operator "onnx.Mul"(%9601, %9603) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9605 = torch.operator "onnx.Add"(%9366, %9604) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9606 = torch.operator "onnx.Gemm"(%1285, %412, %413) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %9607 = torch.operator "onnx.Shape"(%9606) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %9608 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9609 = torch.operator "onnx.Gather"(%9607, %9608) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9610 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9611 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9612 = torch.operator "onnx.Add"(%9609, %9611) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9613 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9614 = torch.operator "onnx.Div"(%9612, %9613) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9615 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9616 = torch.operator "onnx.Mul"(%9614, %9615) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9617 = torch.operator "onnx.Slice"(%9606, %9610, %9616, %9608) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %9618 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9619 = torch.operator "onnx.Mul"(%9614, %9618) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9620 = torch.operator "onnx.Slice"(%9606, %9616, %9619, %9608) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %9621 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9622 = torch.operator "onnx.Mul"(%9614, %9621) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9623 = torch.operator "onnx.Slice"(%9606, %9619, %9622, %9608) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.22Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.2/norm/norm/Constant_attr__value" : tensor<3072xbf16> %9624 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.22Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.22Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.2/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %9625 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.22Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %9626 = torch.operator "onnx.LayerNormalization"(%9605, %9624, %9625) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9627 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9628 = torch.operator "onnx.Unsqueeze"(%9620, %9627) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %9629 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %9630 = torch.operator "onnx.Add"(%9628, %9629) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %9631 = torch.operator "onnx.Mul"(%9626, %9630) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9632 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9633 = torch.operator "onnx.Unsqueeze"(%9617, %9632) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %9634 = torch.operator "onnx.Add"(%9631, %9633) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9635 = torch.operator "onnx.MatMul"(%9634, %979) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9636 = torch.operator "onnx.Add"(%414, %9635) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9637 = torch.operator "onnx.Mul"(%9636, %9636) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9638 = torch.operator "onnx.Mul"(%9636, %9637) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9639 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %9640 = torch.operator "onnx.Mul"(%9639, %9638) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9641 = torch.operator "onnx.Add"(%9636, %9640) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9642 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %9643 = torch.operator "onnx.Mul"(%9642, %9641) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9644 = torch.operator "onnx.Tanh"(%9643) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9645 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %9646 = torch.operator "onnx.Add"(%9645, %9644) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9647 = torch.operator "onnx.Mul"(%9636, %9646) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9648 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %9649 = torch.operator "onnx.Mul"(%9648, %9647) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9650 = torch.operator "onnx.Shape"(%9634) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %9651 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9652 = torch.operator "onnx.Gather"(%9650, %9651) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9653 = torch.operator "onnx.MatMul"(%9634, %980) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9654 = torch.operator "onnx.Add"(%418, %9653) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9655 = torch.operator "onnx.MatMul"(%9634, %981) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9656 = torch.operator "onnx.Add"(%419, %9655) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9657 = torch.operator "onnx.MatMul"(%9634, %982) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9658 = torch.operator "onnx.Add"(%420, %9657) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9659 = torch.operator "onnx.Shape"(%9656) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %9660 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9661 = torch.operator "onnx.Gather"(%9659, %9660) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9662 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9663 = torch.operator "onnx.Div"(%9661, %9662) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9664 = torch.operator "onnx.Cast"(%9663) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9665 = torch.operator "onnx.Cast"(%9664) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9666 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14208_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9667 = torch.operator "onnx.Unsqueeze"(%9652, %9666) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9668 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9669 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9670 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14212_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9671 = torch.operator "onnx.Unsqueeze"(%9665, %9670) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9672 = torch.operator "onnx.Concat"(%9667, %9668, %9669, %9671) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %9673 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14215_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9674 = torch.operator "onnx.Unsqueeze"(%9652, %9673) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9675 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9676 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9677 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14219_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9678 = torch.operator "onnx.Unsqueeze"(%9665, %9677) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9679 = torch.operator "onnx.Concat"(%9674, %9675, %9676, %9678) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %9680 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14222_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9681 = torch.operator "onnx.Unsqueeze"(%9652, %9680) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9682 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9683 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9684 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14226_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9685 = torch.operator "onnx.Unsqueeze"(%9665, %9684) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9686 = torch.operator "onnx.Concat"(%9681, %9682, %9683, %9685) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %9687 = torch.operator "onnx.Reshape"(%9654, %9672) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %9688 = torch.operator "onnx.Transpose"(%9687) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %9689 = torch.operator "onnx.Reshape"(%9656, %9679) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %9690 = torch.operator "onnx.Transpose"(%9689) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %9691 = torch.operator "onnx.Reshape"(%9658, %9686) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %9692 = torch.operator "onnx.Transpose"(%9691) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %9693 = torch.operator "onnx.Cast"(%9688) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %9694 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %9695 = torch.operator "onnx.Pow"(%9693, %9694) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %9696 = torch.operator "onnx.ReduceMean"(%9695) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %9697 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %9698 = torch.operator "onnx.Add"(%9696, %9697) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %9699 = torch.operator "onnx.Sqrt"(%9698) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %9700 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %9701 = torch.operator "onnx.Div"(%9700, %9699) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %9702 = torch.operator "onnx.Cast"(%9688) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %9703 = torch.operator "onnx.Mul"(%9702, %9701) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %9704 = torch.operator "onnx.Cast"(%9703) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %9705 = torch.operator "onnx.Mul"(%9704, %416) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %9706 = torch.operator "onnx.Cast"(%9690) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %9707 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %9708 = torch.operator "onnx.Pow"(%9706, %9707) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %9709 = torch.operator "onnx.ReduceMean"(%9708) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %9710 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %9711 = torch.operator "onnx.Add"(%9709, %9710) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %9712 = torch.operator "onnx.Sqrt"(%9711) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %9713 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %9714 = torch.operator "onnx.Div"(%9713, %9712) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %9715 = torch.operator "onnx.Cast"(%9690) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %9716 = torch.operator "onnx.Mul"(%9715, %9714) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %9717 = torch.operator "onnx.Cast"(%9716) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %9718 = torch.operator "onnx.Mul"(%9717, %417) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %9719 = torch.operator "onnx.Shape"(%9705) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %9720 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9721 = torch.operator "onnx.Gather"(%9719, %9720) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9722 = torch.operator "onnx.Shape"(%9705) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %9723 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9724 = torch.operator "onnx.Gather"(%9722, %9723) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9725 = torch.operator "onnx.Shape"(%9705) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %9726 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9727 = torch.operator "onnx.Gather"(%9725, %9726) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9728 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14270_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9729 = torch.operator "onnx.Unsqueeze"(%9721, %9728) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9730 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14272_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9731 = torch.operator "onnx.Unsqueeze"(%9724, %9730) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9732 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14274_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9733 = torch.operator "onnx.Unsqueeze"(%9727, %9732) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9734 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9735 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9736 = torch.operator "onnx.Concat"(%9729, %9731, %9733, %9734, %9735) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %9737 = torch.operator "onnx.Reshape"(%9705, %9736) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %9738 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %9739:2 = torch.operator "onnx.Split"(%9737, %9738) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %9740 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9741 = torch.operator "onnx.Squeeze"(%9739#0, %9740) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %9742 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9743 = torch.operator "onnx.Squeeze"(%9739#1, %9742) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %9744 = torch.operator "onnx.Neg"(%9743) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %9745 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9746 = torch.operator "onnx.Unsqueeze"(%9744, %9745) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %9747 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9748 = torch.operator "onnx.Unsqueeze"(%9741, %9747) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %9749 = torch.operator "onnx.Concat"(%9746, %9748) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %9750 = torch.operator "onnx.Shape"(%9749) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %9751 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9752 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9753 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9754 = torch.operator "onnx.Slice"(%9750, %9752, %9753, %9751) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %9755 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9756 = torch.operator "onnx.Concat"(%9754, %9755) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %9757 = torch.operator "onnx.Reshape"(%9749, %9756) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %9758 = torch.operator "onnx.Cast"(%9705) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %9759 = torch.operator "onnx.Mul"(%9758, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %9760 = torch.operator "onnx.Cast"(%9757) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %9761 = torch.operator "onnx.Mul"(%9760, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %9762 = torch.operator "onnx.Add"(%9759, %9761) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %9763 = torch.operator "onnx.Cast"(%9762) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %9764 = torch.operator "onnx.Shape"(%9718) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %9765 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9766 = torch.operator "onnx.Gather"(%9764, %9765) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9767 = torch.operator "onnx.Shape"(%9718) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %9768 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9769 = torch.operator "onnx.Gather"(%9767, %9768) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9770 = torch.operator "onnx.Shape"(%9718) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %9771 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9772 = torch.operator "onnx.Gather"(%9770, %9771) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9773 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14315_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9774 = torch.operator "onnx.Unsqueeze"(%9766, %9773) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9775 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14317_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9776 = torch.operator "onnx.Unsqueeze"(%9769, %9775) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9777 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14319_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9778 = torch.operator "onnx.Unsqueeze"(%9772, %9777) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9779 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9780 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9781 = torch.operator "onnx.Concat"(%9774, %9776, %9778, %9779, %9780) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %9782 = torch.operator "onnx.Reshape"(%9718, %9781) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %9783 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %9784:2 = torch.operator "onnx.Split"(%9782, %9783) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %9785 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9786 = torch.operator "onnx.Squeeze"(%9784#0, %9785) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %9787 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9788 = torch.operator "onnx.Squeeze"(%9784#1, %9787) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %9789 = torch.operator "onnx.Neg"(%9788) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %9790 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9791 = torch.operator "onnx.Unsqueeze"(%9789, %9790) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %9792 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9793 = torch.operator "onnx.Unsqueeze"(%9786, %9792) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %9794 = torch.operator "onnx.Concat"(%9791, %9793) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %9795 = torch.operator "onnx.Shape"(%9794) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %9796 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9797 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9798 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9799 = torch.operator "onnx.Slice"(%9795, %9797, %9798, %9796) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %9800 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9801 = torch.operator "onnx.Concat"(%9799, %9800) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %9802 = torch.operator "onnx.Reshape"(%9794, %9801) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %9803 = torch.operator "onnx.Cast"(%9718) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %9804 = torch.operator "onnx.Mul"(%9803, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %9805 = torch.operator "onnx.Cast"(%9802) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %9806 = torch.operator "onnx.Mul"(%9805, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %9807 = torch.operator "onnx.Add"(%9804, %9806) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %9808 = torch.operator "onnx.Cast"(%9807) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %9809 = torch.operator "onnx.Shape"(%9763) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %9810 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9811 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9812 = torch.operator "onnx.Slice"(%9809, %9810, %9811) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9813 = torch.operator "onnx.Cast"(%9812) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %9814 = torch.operator "onnx.Sqrt"(%9813) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %9815 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %9816 = torch.operator "onnx.Cast"(%9814) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %9817 = torch.operator "onnx.Div"(%9815, %9816) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %9818 = torch.operator "onnx.Cast"(%9817) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %9819 = torch.operator "onnx.Transpose"(%9808) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %9820 = torch.operator "onnx.Sqrt"(%9818) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %9821 = torch.operator "onnx.Mul"(%9763, %9820) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %9822 = torch.operator "onnx.Sqrt"(%9818) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %9823 = torch.operator "onnx.Mul"(%9819, %9822) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %9824 = torch.operator "onnx.MatMul"(%9821, %9823) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %9825 = torch.operator "onnx.Softmax"(%9824) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %9826 = torch.operator "onnx.MatMul"(%9825, %9692) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %9827 = torch.operator "onnx.Transpose"(%9826) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %9828 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9829 = torch.operator "onnx.Mul"(%9665, %9828) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9830 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14372_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9831 = torch.operator "onnx.Unsqueeze"(%9652, %9830) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9832 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9833 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14375_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9834 = torch.operator "onnx.Unsqueeze"(%9829, %9833) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9835 = torch.operator "onnx.Concat"(%9831, %9832, %9834) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %9836 = torch.operator "onnx.Reshape"(%9827, %9835) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %9837 = torch.operator "onnx.Cast"(%9836) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %9838 = torch.operator "onnx.Concat"(%9837, %9649) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %9839 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.2_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9840 = torch.operator "onnx.Unsqueeze"(%9623, %9839) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %9841 = torch.operator "onnx.MatMul"(%9838, %983) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9842 = torch.operator "onnx.Add"(%415, %9841) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9843 = torch.operator "onnx.Mul"(%9840, %9842) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9844 = torch.operator "onnx.Add"(%9605, %9843) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9845 = torch.operator "onnx.Gemm"(%1285, %421, %422) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %9846 = torch.operator "onnx.Shape"(%9845) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %9847 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9848 = torch.operator "onnx.Gather"(%9846, %9847) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9849 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9850 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9851 = torch.operator "onnx.Add"(%9848, %9850) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9852 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9853 = torch.operator "onnx.Div"(%9851, %9852) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9854 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9855 = torch.operator "onnx.Mul"(%9853, %9854) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9856 = torch.operator "onnx.Slice"(%9845, %9849, %9855, %9847) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %9857 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9858 = torch.operator "onnx.Mul"(%9853, %9857) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9859 = torch.operator "onnx.Slice"(%9845, %9855, %9858, %9847) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %9860 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9861 = torch.operator "onnx.Mul"(%9853, %9860) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9862 = torch.operator "onnx.Slice"(%9845, %9858, %9861, %9847) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.32Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.3/norm/norm/Constant_attr__value" : tensor<3072xbf16> %9863 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.32Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.32Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.3/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %9864 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.32Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %9865 = torch.operator "onnx.LayerNormalization"(%9844, %9863, %9864) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9866 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9867 = torch.operator "onnx.Unsqueeze"(%9859, %9866) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %9868 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %9869 = torch.operator "onnx.Add"(%9867, %9868) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %9870 = torch.operator "onnx.Mul"(%9865, %9869) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9871 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9872 = torch.operator "onnx.Unsqueeze"(%9856, %9871) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %9873 = torch.operator "onnx.Add"(%9870, %9872) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9874 = torch.operator "onnx.MatMul"(%9873, %984) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9875 = torch.operator "onnx.Add"(%423, %9874) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9876 = torch.operator "onnx.Mul"(%9875, %9875) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9877 = torch.operator "onnx.Mul"(%9875, %9876) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9878 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %9879 = torch.operator "onnx.Mul"(%9878, %9877) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9880 = torch.operator "onnx.Add"(%9875, %9879) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9881 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %9882 = torch.operator "onnx.Mul"(%9881, %9880) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9883 = torch.operator "onnx.Tanh"(%9882) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9884 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %9885 = torch.operator "onnx.Add"(%9884, %9883) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9886 = torch.operator "onnx.Mul"(%9875, %9885) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9887 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %9888 = torch.operator "onnx.Mul"(%9887, %9886) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %9889 = torch.operator "onnx.Shape"(%9873) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %9890 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9891 = torch.operator "onnx.Gather"(%9889, %9890) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9892 = torch.operator "onnx.MatMul"(%9873, %985) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9893 = torch.operator "onnx.Add"(%427, %9892) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9894 = torch.operator "onnx.MatMul"(%9873, %986) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9895 = torch.operator "onnx.Add"(%428, %9894) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9896 = torch.operator "onnx.MatMul"(%9873, %987) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9897 = torch.operator "onnx.Add"(%429, %9896) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %9898 = torch.operator "onnx.Shape"(%9895) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %9899 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9900 = torch.operator "onnx.Gather"(%9898, %9899) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9901 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9902 = torch.operator "onnx.Div"(%9900, %9901) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9903 = torch.operator "onnx.Cast"(%9902) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9904 = torch.operator "onnx.Cast"(%9903) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9905 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14447_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9906 = torch.operator "onnx.Unsqueeze"(%9891, %9905) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9907 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9908 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9909 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14451_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9910 = torch.operator "onnx.Unsqueeze"(%9904, %9909) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9911 = torch.operator "onnx.Concat"(%9906, %9907, %9908, %9910) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %9912 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14454_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9913 = torch.operator "onnx.Unsqueeze"(%9891, %9912) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9914 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9915 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9916 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14458_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9917 = torch.operator "onnx.Unsqueeze"(%9904, %9916) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9918 = torch.operator "onnx.Concat"(%9913, %9914, %9915, %9917) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %9919 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14461_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9920 = torch.operator "onnx.Unsqueeze"(%9891, %9919) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9921 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9922 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9923 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14465_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9924 = torch.operator "onnx.Unsqueeze"(%9904, %9923) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9925 = torch.operator "onnx.Concat"(%9920, %9921, %9922, %9924) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %9926 = torch.operator "onnx.Reshape"(%9893, %9911) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %9927 = torch.operator "onnx.Transpose"(%9926) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %9928 = torch.operator "onnx.Reshape"(%9895, %9918) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %9929 = torch.operator "onnx.Transpose"(%9928) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %9930 = torch.operator "onnx.Reshape"(%9897, %9925) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %9931 = torch.operator "onnx.Transpose"(%9930) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %9932 = torch.operator "onnx.Cast"(%9927) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %9933 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %9934 = torch.operator "onnx.Pow"(%9932, %9933) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %9935 = torch.operator "onnx.ReduceMean"(%9934) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %9936 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %9937 = torch.operator "onnx.Add"(%9935, %9936) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %9938 = torch.operator "onnx.Sqrt"(%9937) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %9939 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %9940 = torch.operator "onnx.Div"(%9939, %9938) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %9941 = torch.operator "onnx.Cast"(%9927) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %9942 = torch.operator "onnx.Mul"(%9941, %9940) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %9943 = torch.operator "onnx.Cast"(%9942) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %9944 = torch.operator "onnx.Mul"(%9943, %425) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %9945 = torch.operator "onnx.Cast"(%9929) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %9946 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %9947 = torch.operator "onnx.Pow"(%9945, %9946) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %9948 = torch.operator "onnx.ReduceMean"(%9947) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %9949 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %9950 = torch.operator "onnx.Add"(%9948, %9949) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %9951 = torch.operator "onnx.Sqrt"(%9950) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %9952 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %9953 = torch.operator "onnx.Div"(%9952, %9951) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %9954 = torch.operator "onnx.Cast"(%9929) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %9955 = torch.operator "onnx.Mul"(%9954, %9953) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %9956 = torch.operator "onnx.Cast"(%9955) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %9957 = torch.operator "onnx.Mul"(%9956, %426) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %9958 = torch.operator "onnx.Shape"(%9944) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %9959 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9960 = torch.operator "onnx.Gather"(%9958, %9959) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9961 = torch.operator "onnx.Shape"(%9944) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %9962 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9963 = torch.operator "onnx.Gather"(%9961, %9962) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9964 = torch.operator "onnx.Shape"(%9944) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %9965 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %9966 = torch.operator "onnx.Gather"(%9964, %9965) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %9967 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14509_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9968 = torch.operator "onnx.Unsqueeze"(%9960, %9967) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9969 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14511_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9970 = torch.operator "onnx.Unsqueeze"(%9963, %9969) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9971 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14513_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9972 = torch.operator "onnx.Unsqueeze"(%9966, %9971) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %9973 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9974 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9975 = torch.operator "onnx.Concat"(%9968, %9970, %9972, %9973, %9974) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %9976 = torch.operator "onnx.Reshape"(%9944, %9975) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %9977 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %9978:2 = torch.operator "onnx.Split"(%9976, %9977) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %9979 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9980 = torch.operator "onnx.Squeeze"(%9978#0, %9979) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %9981 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9982 = torch.operator "onnx.Squeeze"(%9978#1, %9981) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %9983 = torch.operator "onnx.Neg"(%9982) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %9984 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9985 = torch.operator "onnx.Unsqueeze"(%9983, %9984) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %9986 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9987 = torch.operator "onnx.Unsqueeze"(%9980, %9986) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %9988 = torch.operator "onnx.Concat"(%9985, %9987) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %9989 = torch.operator "onnx.Shape"(%9988) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %9990 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9991 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9992 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9993 = torch.operator "onnx.Slice"(%9989, %9991, %9992, %9990) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %9994 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %9995 = torch.operator "onnx.Concat"(%9993, %9994) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %9996 = torch.operator "onnx.Reshape"(%9988, %9995) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %9997 = torch.operator "onnx.Cast"(%9944) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %9998 = torch.operator "onnx.Mul"(%9997, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %9999 = torch.operator "onnx.Cast"(%9996) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %10000 = torch.operator "onnx.Mul"(%9999, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %10001 = torch.operator "onnx.Add"(%9998, %10000) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %10002 = torch.operator "onnx.Cast"(%10001) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %10003 = torch.operator "onnx.Shape"(%9957) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %10004 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10005 = torch.operator "onnx.Gather"(%10003, %10004) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10006 = torch.operator "onnx.Shape"(%9957) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %10007 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10008 = torch.operator "onnx.Gather"(%10006, %10007) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10009 = torch.operator "onnx.Shape"(%9957) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %10010 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10011 = torch.operator "onnx.Gather"(%10009, %10010) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10012 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14554_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10013 = torch.operator "onnx.Unsqueeze"(%10005, %10012) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10014 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14556_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10015 = torch.operator "onnx.Unsqueeze"(%10008, %10014) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10016 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14558_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10017 = torch.operator "onnx.Unsqueeze"(%10011, %10016) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10018 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10019 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10020 = torch.operator "onnx.Concat"(%10013, %10015, %10017, %10018, %10019) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %10021 = torch.operator "onnx.Reshape"(%9957, %10020) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %10022 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %10023:2 = torch.operator "onnx.Split"(%10021, %10022) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %10024 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10025 = torch.operator "onnx.Squeeze"(%10023#0, %10024) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10026 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10027 = torch.operator "onnx.Squeeze"(%10023#1, %10026) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10028 = torch.operator "onnx.Neg"(%10027) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %10029 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10030 = torch.operator "onnx.Unsqueeze"(%10028, %10029) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %10031 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10032 = torch.operator "onnx.Unsqueeze"(%10025, %10031) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %10033 = torch.operator "onnx.Concat"(%10030, %10032) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %10034 = torch.operator "onnx.Shape"(%10033) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %10035 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10036 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10037 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10038 = torch.operator "onnx.Slice"(%10034, %10036, %10037, %10035) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %10039 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10040 = torch.operator "onnx.Concat"(%10038, %10039) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %10041 = torch.operator "onnx.Reshape"(%10033, %10040) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10042 = torch.operator "onnx.Cast"(%9957) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %10043 = torch.operator "onnx.Mul"(%10042, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %10044 = torch.operator "onnx.Cast"(%10041) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %10045 = torch.operator "onnx.Mul"(%10044, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %10046 = torch.operator "onnx.Add"(%10043, %10045) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %10047 = torch.operator "onnx.Cast"(%10046) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %10048 = torch.operator "onnx.Shape"(%10002) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %10049 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10050 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10051 = torch.operator "onnx.Slice"(%10048, %10049, %10050) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10052 = torch.operator "onnx.Cast"(%10051) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %10053 = torch.operator "onnx.Sqrt"(%10052) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %10054 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %10055 = torch.operator "onnx.Cast"(%10053) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %10056 = torch.operator "onnx.Div"(%10054, %10055) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %10057 = torch.operator "onnx.Cast"(%10056) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %10058 = torch.operator "onnx.Transpose"(%10047) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %10059 = torch.operator "onnx.Sqrt"(%10057) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %10060 = torch.operator "onnx.Mul"(%10002, %10059) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %10061 = torch.operator "onnx.Sqrt"(%10057) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %10062 = torch.operator "onnx.Mul"(%10058, %10061) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %10063 = torch.operator "onnx.MatMul"(%10060, %10062) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %10064 = torch.operator "onnx.Softmax"(%10063) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %10065 = torch.operator "onnx.MatMul"(%10064, %9931) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %10066 = torch.operator "onnx.Transpose"(%10065) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %10067 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10068 = torch.operator "onnx.Mul"(%9904, %10067) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10069 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14611_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10070 = torch.operator "onnx.Unsqueeze"(%9891, %10069) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10071 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10072 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14614_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10073 = torch.operator "onnx.Unsqueeze"(%10068, %10072) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10074 = torch.operator "onnx.Concat"(%10070, %10071, %10073) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %10075 = torch.operator "onnx.Reshape"(%10066, %10074) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %10076 = torch.operator "onnx.Cast"(%10075) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %10077 = torch.operator "onnx.Concat"(%10076, %9888) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %10078 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.3_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10079 = torch.operator "onnx.Unsqueeze"(%9862, %10078) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %10080 = torch.operator "onnx.MatMul"(%10077, %988) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10081 = torch.operator "onnx.Add"(%424, %10080) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10082 = torch.operator "onnx.Mul"(%10079, %10081) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10083 = torch.operator "onnx.Add"(%9844, %10082) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10084 = torch.operator "onnx.Gemm"(%1285, %430, %431) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %10085 = torch.operator "onnx.Shape"(%10084) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %10086 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10087 = torch.operator "onnx.Gather"(%10085, %10086) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10088 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10089 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10090 = torch.operator "onnx.Add"(%10087, %10089) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10091 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10092 = torch.operator "onnx.Div"(%10090, %10091) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10093 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10094 = torch.operator "onnx.Mul"(%10092, %10093) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10095 = torch.operator "onnx.Slice"(%10084, %10088, %10094, %10086) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %10096 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10097 = torch.operator "onnx.Mul"(%10092, %10096) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10098 = torch.operator "onnx.Slice"(%10084, %10094, %10097, %10086) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %10099 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10100 = torch.operator "onnx.Mul"(%10092, %10099) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10101 = torch.operator "onnx.Slice"(%10084, %10097, %10100, %10086) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.42Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.4/norm/norm/Constant_attr__value" : tensor<3072xbf16> %10102 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.42Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.42Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.4/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %10103 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.42Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %10104 = torch.operator "onnx.LayerNormalization"(%10083, %10102, %10103) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10105 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10106 = torch.operator "onnx.Unsqueeze"(%10098, %10105) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %10107 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %10108 = torch.operator "onnx.Add"(%10106, %10107) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %10109 = torch.operator "onnx.Mul"(%10104, %10108) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10110 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10111 = torch.operator "onnx.Unsqueeze"(%10095, %10110) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %10112 = torch.operator "onnx.Add"(%10109, %10111) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10113 = torch.operator "onnx.MatMul"(%10112, %989) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10114 = torch.operator "onnx.Add"(%432, %10113) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10115 = torch.operator "onnx.Mul"(%10114, %10114) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10116 = torch.operator "onnx.Mul"(%10114, %10115) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10117 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %10118 = torch.operator "onnx.Mul"(%10117, %10116) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10119 = torch.operator "onnx.Add"(%10114, %10118) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10120 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %10121 = torch.operator "onnx.Mul"(%10120, %10119) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10122 = torch.operator "onnx.Tanh"(%10121) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10123 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %10124 = torch.operator "onnx.Add"(%10123, %10122) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10125 = torch.operator "onnx.Mul"(%10114, %10124) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10126 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %10127 = torch.operator "onnx.Mul"(%10126, %10125) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10128 = torch.operator "onnx.Shape"(%10112) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %10129 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10130 = torch.operator "onnx.Gather"(%10128, %10129) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10131 = torch.operator "onnx.MatMul"(%10112, %990) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10132 = torch.operator "onnx.Add"(%436, %10131) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10133 = torch.operator "onnx.MatMul"(%10112, %991) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10134 = torch.operator "onnx.Add"(%437, %10133) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10135 = torch.operator "onnx.MatMul"(%10112, %992) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10136 = torch.operator "onnx.Add"(%438, %10135) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10137 = torch.operator "onnx.Shape"(%10134) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %10138 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10139 = torch.operator "onnx.Gather"(%10137, %10138) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10140 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10141 = torch.operator "onnx.Div"(%10139, %10140) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10142 = torch.operator "onnx.Cast"(%10141) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10143 = torch.operator "onnx.Cast"(%10142) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10144 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14686_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10145 = torch.operator "onnx.Unsqueeze"(%10130, %10144) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10146 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10147 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10148 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14690_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10149 = torch.operator "onnx.Unsqueeze"(%10143, %10148) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10150 = torch.operator "onnx.Concat"(%10145, %10146, %10147, %10149) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %10151 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14693_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10152 = torch.operator "onnx.Unsqueeze"(%10130, %10151) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10153 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10154 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10155 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14697_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10156 = torch.operator "onnx.Unsqueeze"(%10143, %10155) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10157 = torch.operator "onnx.Concat"(%10152, %10153, %10154, %10156) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %10158 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14700_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10159 = torch.operator "onnx.Unsqueeze"(%10130, %10158) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10160 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10161 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10162 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14704_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10163 = torch.operator "onnx.Unsqueeze"(%10143, %10162) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10164 = torch.operator "onnx.Concat"(%10159, %10160, %10161, %10163) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %10165 = torch.operator "onnx.Reshape"(%10132, %10150) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10166 = torch.operator "onnx.Transpose"(%10165) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %10167 = torch.operator "onnx.Reshape"(%10134, %10157) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10168 = torch.operator "onnx.Transpose"(%10167) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %10169 = torch.operator "onnx.Reshape"(%10136, %10164) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10170 = torch.operator "onnx.Transpose"(%10169) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %10171 = torch.operator "onnx.Cast"(%10166) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %10172 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %10173 = torch.operator "onnx.Pow"(%10171, %10172) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %10174 = torch.operator "onnx.ReduceMean"(%10173) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %10175 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %10176 = torch.operator "onnx.Add"(%10174, %10175) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %10177 = torch.operator "onnx.Sqrt"(%10176) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %10178 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %10179 = torch.operator "onnx.Div"(%10178, %10177) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %10180 = torch.operator "onnx.Cast"(%10166) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %10181 = torch.operator "onnx.Mul"(%10180, %10179) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %10182 = torch.operator "onnx.Cast"(%10181) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %10183 = torch.operator "onnx.Mul"(%10182, %434) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %10184 = torch.operator "onnx.Cast"(%10168) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %10185 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %10186 = torch.operator "onnx.Pow"(%10184, %10185) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %10187 = torch.operator "onnx.ReduceMean"(%10186) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %10188 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %10189 = torch.operator "onnx.Add"(%10187, %10188) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %10190 = torch.operator "onnx.Sqrt"(%10189) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %10191 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %10192 = torch.operator "onnx.Div"(%10191, %10190) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %10193 = torch.operator "onnx.Cast"(%10168) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %10194 = torch.operator "onnx.Mul"(%10193, %10192) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %10195 = torch.operator "onnx.Cast"(%10194) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %10196 = torch.operator "onnx.Mul"(%10195, %435) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %10197 = torch.operator "onnx.Shape"(%10183) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %10198 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10199 = torch.operator "onnx.Gather"(%10197, %10198) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10200 = torch.operator "onnx.Shape"(%10183) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %10201 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10202 = torch.operator "onnx.Gather"(%10200, %10201) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10203 = torch.operator "onnx.Shape"(%10183) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %10204 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10205 = torch.operator "onnx.Gather"(%10203, %10204) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10206 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14748_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10207 = torch.operator "onnx.Unsqueeze"(%10199, %10206) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10208 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14750_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10209 = torch.operator "onnx.Unsqueeze"(%10202, %10208) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10210 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14752_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10211 = torch.operator "onnx.Unsqueeze"(%10205, %10210) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10212 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10213 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10214 = torch.operator "onnx.Concat"(%10207, %10209, %10211, %10212, %10213) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %10215 = torch.operator "onnx.Reshape"(%10183, %10214) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %10216 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %10217:2 = torch.operator "onnx.Split"(%10215, %10216) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %10218 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10219 = torch.operator "onnx.Squeeze"(%10217#0, %10218) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10220 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10221 = torch.operator "onnx.Squeeze"(%10217#1, %10220) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10222 = torch.operator "onnx.Neg"(%10221) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %10223 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10224 = torch.operator "onnx.Unsqueeze"(%10222, %10223) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %10225 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10226 = torch.operator "onnx.Unsqueeze"(%10219, %10225) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %10227 = torch.operator "onnx.Concat"(%10224, %10226) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %10228 = torch.operator "onnx.Shape"(%10227) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %10229 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10230 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10231 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10232 = torch.operator "onnx.Slice"(%10228, %10230, %10231, %10229) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %10233 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10234 = torch.operator "onnx.Concat"(%10232, %10233) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %10235 = torch.operator "onnx.Reshape"(%10227, %10234) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10236 = torch.operator "onnx.Cast"(%10183) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %10237 = torch.operator "onnx.Mul"(%10236, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %10238 = torch.operator "onnx.Cast"(%10235) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %10239 = torch.operator "onnx.Mul"(%10238, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %10240 = torch.operator "onnx.Add"(%10237, %10239) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %10241 = torch.operator "onnx.Cast"(%10240) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %10242 = torch.operator "onnx.Shape"(%10196) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %10243 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10244 = torch.operator "onnx.Gather"(%10242, %10243) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10245 = torch.operator "onnx.Shape"(%10196) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %10246 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10247 = torch.operator "onnx.Gather"(%10245, %10246) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10248 = torch.operator "onnx.Shape"(%10196) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %10249 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10250 = torch.operator "onnx.Gather"(%10248, %10249) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10251 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14793_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10252 = torch.operator "onnx.Unsqueeze"(%10244, %10251) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10253 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14795_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10254 = torch.operator "onnx.Unsqueeze"(%10247, %10253) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10255 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14797_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10256 = torch.operator "onnx.Unsqueeze"(%10250, %10255) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10257 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10258 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10259 = torch.operator "onnx.Concat"(%10252, %10254, %10256, %10257, %10258) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %10260 = torch.operator "onnx.Reshape"(%10196, %10259) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %10261 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %10262:2 = torch.operator "onnx.Split"(%10260, %10261) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %10263 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10264 = torch.operator "onnx.Squeeze"(%10262#0, %10263) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10265 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10266 = torch.operator "onnx.Squeeze"(%10262#1, %10265) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10267 = torch.operator "onnx.Neg"(%10266) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %10268 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10269 = torch.operator "onnx.Unsqueeze"(%10267, %10268) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %10270 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10271 = torch.operator "onnx.Unsqueeze"(%10264, %10270) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %10272 = torch.operator "onnx.Concat"(%10269, %10271) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %10273 = torch.operator "onnx.Shape"(%10272) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %10274 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10275 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10276 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10277 = torch.operator "onnx.Slice"(%10273, %10275, %10276, %10274) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %10278 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10279 = torch.operator "onnx.Concat"(%10277, %10278) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %10280 = torch.operator "onnx.Reshape"(%10272, %10279) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10281 = torch.operator "onnx.Cast"(%10196) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %10282 = torch.operator "onnx.Mul"(%10281, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %10283 = torch.operator "onnx.Cast"(%10280) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %10284 = torch.operator "onnx.Mul"(%10283, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %10285 = torch.operator "onnx.Add"(%10282, %10284) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %10286 = torch.operator "onnx.Cast"(%10285) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %10287 = torch.operator "onnx.Shape"(%10241) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %10288 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10289 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10290 = torch.operator "onnx.Slice"(%10287, %10288, %10289) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10291 = torch.operator "onnx.Cast"(%10290) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %10292 = torch.operator "onnx.Sqrt"(%10291) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %10293 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %10294 = torch.operator "onnx.Cast"(%10292) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %10295 = torch.operator "onnx.Div"(%10293, %10294) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %10296 = torch.operator "onnx.Cast"(%10295) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %10297 = torch.operator "onnx.Transpose"(%10286) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %10298 = torch.operator "onnx.Sqrt"(%10296) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %10299 = torch.operator "onnx.Mul"(%10241, %10298) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %10300 = torch.operator "onnx.Sqrt"(%10296) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %10301 = torch.operator "onnx.Mul"(%10297, %10300) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %10302 = torch.operator "onnx.MatMul"(%10299, %10301) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %10303 = torch.operator "onnx.Softmax"(%10302) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %10304 = torch.operator "onnx.MatMul"(%10303, %10170) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %10305 = torch.operator "onnx.Transpose"(%10304) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %10306 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10307 = torch.operator "onnx.Mul"(%10143, %10306) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10308 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14850_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10309 = torch.operator "onnx.Unsqueeze"(%10130, %10308) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10310 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10311 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14853_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10312 = torch.operator "onnx.Unsqueeze"(%10307, %10311) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10313 = torch.operator "onnx.Concat"(%10309, %10310, %10312) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %10314 = torch.operator "onnx.Reshape"(%10305, %10313) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %10315 = torch.operator "onnx.Cast"(%10314) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %10316 = torch.operator "onnx.Concat"(%10315, %10127) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %10317 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.4_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10318 = torch.operator "onnx.Unsqueeze"(%10101, %10317) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %10319 = torch.operator "onnx.MatMul"(%10316, %993) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10320 = torch.operator "onnx.Add"(%433, %10319) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10321 = torch.operator "onnx.Mul"(%10318, %10320) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10322 = torch.operator "onnx.Add"(%10083, %10321) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10323 = torch.operator "onnx.Gemm"(%1285, %439, %440) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %10324 = torch.operator "onnx.Shape"(%10323) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %10325 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10326 = torch.operator "onnx.Gather"(%10324, %10325) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10327 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10328 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10329 = torch.operator "onnx.Add"(%10326, %10328) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10330 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10331 = torch.operator "onnx.Div"(%10329, %10330) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10332 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10333 = torch.operator "onnx.Mul"(%10331, %10332) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10334 = torch.operator "onnx.Slice"(%10323, %10327, %10333, %10325) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %10335 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10336 = torch.operator "onnx.Mul"(%10331, %10335) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10337 = torch.operator "onnx.Slice"(%10323, %10333, %10336, %10325) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %10338 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10339 = torch.operator "onnx.Mul"(%10331, %10338) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10340 = torch.operator "onnx.Slice"(%10323, %10336, %10339, %10325) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.52Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.5/norm/norm/Constant_attr__value" : tensor<3072xbf16> %10341 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.52Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.52Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.5/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %10342 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.52Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %10343 = torch.operator "onnx.LayerNormalization"(%10322, %10341, %10342) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10344 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10345 = torch.operator "onnx.Unsqueeze"(%10337, %10344) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %10346 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %10347 = torch.operator "onnx.Add"(%10345, %10346) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %10348 = torch.operator "onnx.Mul"(%10343, %10347) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10349 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10350 = torch.operator "onnx.Unsqueeze"(%10334, %10349) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %10351 = torch.operator "onnx.Add"(%10348, %10350) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10352 = torch.operator "onnx.MatMul"(%10351, %994) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10353 = torch.operator "onnx.Add"(%441, %10352) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10354 = torch.operator "onnx.Mul"(%10353, %10353) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10355 = torch.operator "onnx.Mul"(%10353, %10354) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10356 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %10357 = torch.operator "onnx.Mul"(%10356, %10355) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10358 = torch.operator "onnx.Add"(%10353, %10357) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10359 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %10360 = torch.operator "onnx.Mul"(%10359, %10358) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10361 = torch.operator "onnx.Tanh"(%10360) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10362 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %10363 = torch.operator "onnx.Add"(%10362, %10361) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10364 = torch.operator "onnx.Mul"(%10353, %10363) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10365 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %10366 = torch.operator "onnx.Mul"(%10365, %10364) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10367 = torch.operator "onnx.Shape"(%10351) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %10368 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10369 = torch.operator "onnx.Gather"(%10367, %10368) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10370 = torch.operator "onnx.MatMul"(%10351, %995) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10371 = torch.operator "onnx.Add"(%445, %10370) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10372 = torch.operator "onnx.MatMul"(%10351, %996) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10373 = torch.operator "onnx.Add"(%446, %10372) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10374 = torch.operator "onnx.MatMul"(%10351, %997) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10375 = torch.operator "onnx.Add"(%447, %10374) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10376 = torch.operator "onnx.Shape"(%10373) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %10377 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10378 = torch.operator "onnx.Gather"(%10376, %10377) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10379 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10380 = torch.operator "onnx.Div"(%10378, %10379) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10381 = torch.operator "onnx.Cast"(%10380) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10382 = torch.operator "onnx.Cast"(%10381) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10383 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14925_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10384 = torch.operator "onnx.Unsqueeze"(%10369, %10383) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10385 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10386 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10387 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14929_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10388 = torch.operator "onnx.Unsqueeze"(%10382, %10387) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10389 = torch.operator "onnx.Concat"(%10384, %10385, %10386, %10388) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %10390 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14932_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10391 = torch.operator "onnx.Unsqueeze"(%10369, %10390) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10392 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10393 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10394 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14936_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10395 = torch.operator "onnx.Unsqueeze"(%10382, %10394) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10396 = torch.operator "onnx.Concat"(%10391, %10392, %10393, %10395) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %10397 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14939_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10398 = torch.operator "onnx.Unsqueeze"(%10369, %10397) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10399 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10400 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10401 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14943_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10402 = torch.operator "onnx.Unsqueeze"(%10382, %10401) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10403 = torch.operator "onnx.Concat"(%10398, %10399, %10400, %10402) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %10404 = torch.operator "onnx.Reshape"(%10371, %10389) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10405 = torch.operator "onnx.Transpose"(%10404) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %10406 = torch.operator "onnx.Reshape"(%10373, %10396) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10407 = torch.operator "onnx.Transpose"(%10406) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %10408 = torch.operator "onnx.Reshape"(%10375, %10403) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10409 = torch.operator "onnx.Transpose"(%10408) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %10410 = torch.operator "onnx.Cast"(%10405) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %10411 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %10412 = torch.operator "onnx.Pow"(%10410, %10411) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %10413 = torch.operator "onnx.ReduceMean"(%10412) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %10414 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %10415 = torch.operator "onnx.Add"(%10413, %10414) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %10416 = torch.operator "onnx.Sqrt"(%10415) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %10417 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %10418 = torch.operator "onnx.Div"(%10417, %10416) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %10419 = torch.operator "onnx.Cast"(%10405) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %10420 = torch.operator "onnx.Mul"(%10419, %10418) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %10421 = torch.operator "onnx.Cast"(%10420) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %10422 = torch.operator "onnx.Mul"(%10421, %443) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %10423 = torch.operator "onnx.Cast"(%10407) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %10424 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %10425 = torch.operator "onnx.Pow"(%10423, %10424) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %10426 = torch.operator "onnx.ReduceMean"(%10425) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %10427 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %10428 = torch.operator "onnx.Add"(%10426, %10427) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %10429 = torch.operator "onnx.Sqrt"(%10428) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %10430 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %10431 = torch.operator "onnx.Div"(%10430, %10429) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %10432 = torch.operator "onnx.Cast"(%10407) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %10433 = torch.operator "onnx.Mul"(%10432, %10431) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %10434 = torch.operator "onnx.Cast"(%10433) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %10435 = torch.operator "onnx.Mul"(%10434, %444) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %10436 = torch.operator "onnx.Shape"(%10422) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %10437 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10438 = torch.operator "onnx.Gather"(%10436, %10437) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10439 = torch.operator "onnx.Shape"(%10422) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %10440 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10441 = torch.operator "onnx.Gather"(%10439, %10440) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10442 = torch.operator "onnx.Shape"(%10422) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %10443 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10444 = torch.operator "onnx.Gather"(%10442, %10443) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10445 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14987_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10446 = torch.operator "onnx.Unsqueeze"(%10438, %10445) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10447 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14989_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10448 = torch.operator "onnx.Unsqueeze"(%10441, %10447) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10449 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_14991_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10450 = torch.operator "onnx.Unsqueeze"(%10444, %10449) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10451 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10452 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10453 = torch.operator "onnx.Concat"(%10446, %10448, %10450, %10451, %10452) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %10454 = torch.operator "onnx.Reshape"(%10422, %10453) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %10455 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %10456:2 = torch.operator "onnx.Split"(%10454, %10455) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %10457 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10458 = torch.operator "onnx.Squeeze"(%10456#0, %10457) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10459 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10460 = torch.operator "onnx.Squeeze"(%10456#1, %10459) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10461 = torch.operator "onnx.Neg"(%10460) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %10462 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10463 = torch.operator "onnx.Unsqueeze"(%10461, %10462) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %10464 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10465 = torch.operator "onnx.Unsqueeze"(%10458, %10464) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %10466 = torch.operator "onnx.Concat"(%10463, %10465) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %10467 = torch.operator "onnx.Shape"(%10466) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %10468 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10469 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10470 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10471 = torch.operator "onnx.Slice"(%10467, %10469, %10470, %10468) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %10472 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10473 = torch.operator "onnx.Concat"(%10471, %10472) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %10474 = torch.operator "onnx.Reshape"(%10466, %10473) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10475 = torch.operator "onnx.Cast"(%10422) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %10476 = torch.operator "onnx.Mul"(%10475, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %10477 = torch.operator "onnx.Cast"(%10474) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %10478 = torch.operator "onnx.Mul"(%10477, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %10479 = torch.operator "onnx.Add"(%10476, %10478) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %10480 = torch.operator "onnx.Cast"(%10479) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %10481 = torch.operator "onnx.Shape"(%10435) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %10482 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10483 = torch.operator "onnx.Gather"(%10481, %10482) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10484 = torch.operator "onnx.Shape"(%10435) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %10485 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10486 = torch.operator "onnx.Gather"(%10484, %10485) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10487 = torch.operator "onnx.Shape"(%10435) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %10488 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10489 = torch.operator "onnx.Gather"(%10487, %10488) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10490 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15032_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10491 = torch.operator "onnx.Unsqueeze"(%10483, %10490) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10492 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15034_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10493 = torch.operator "onnx.Unsqueeze"(%10486, %10492) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10494 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15036_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10495 = torch.operator "onnx.Unsqueeze"(%10489, %10494) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10496 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10497 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10498 = torch.operator "onnx.Concat"(%10491, %10493, %10495, %10496, %10497) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %10499 = torch.operator "onnx.Reshape"(%10435, %10498) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %10500 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %10501:2 = torch.operator "onnx.Split"(%10499, %10500) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %10502 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10503 = torch.operator "onnx.Squeeze"(%10501#0, %10502) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10504 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10505 = torch.operator "onnx.Squeeze"(%10501#1, %10504) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10506 = torch.operator "onnx.Neg"(%10505) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %10507 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10508 = torch.operator "onnx.Unsqueeze"(%10506, %10507) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %10509 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10510 = torch.operator "onnx.Unsqueeze"(%10503, %10509) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %10511 = torch.operator "onnx.Concat"(%10508, %10510) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %10512 = torch.operator "onnx.Shape"(%10511) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %10513 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10514 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10515 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10516 = torch.operator "onnx.Slice"(%10512, %10514, %10515, %10513) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %10517 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10518 = torch.operator "onnx.Concat"(%10516, %10517) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %10519 = torch.operator "onnx.Reshape"(%10511, %10518) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10520 = torch.operator "onnx.Cast"(%10435) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %10521 = torch.operator "onnx.Mul"(%10520, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %10522 = torch.operator "onnx.Cast"(%10519) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %10523 = torch.operator "onnx.Mul"(%10522, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %10524 = torch.operator "onnx.Add"(%10521, %10523) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %10525 = torch.operator "onnx.Cast"(%10524) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %10526 = torch.operator "onnx.Shape"(%10480) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %10527 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10528 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10529 = torch.operator "onnx.Slice"(%10526, %10527, %10528) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10530 = torch.operator "onnx.Cast"(%10529) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %10531 = torch.operator "onnx.Sqrt"(%10530) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %10532 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %10533 = torch.operator "onnx.Cast"(%10531) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %10534 = torch.operator "onnx.Div"(%10532, %10533) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %10535 = torch.operator "onnx.Cast"(%10534) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %10536 = torch.operator "onnx.Transpose"(%10525) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %10537 = torch.operator "onnx.Sqrt"(%10535) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %10538 = torch.operator "onnx.Mul"(%10480, %10537) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %10539 = torch.operator "onnx.Sqrt"(%10535) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %10540 = torch.operator "onnx.Mul"(%10536, %10539) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %10541 = torch.operator "onnx.MatMul"(%10538, %10540) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %10542 = torch.operator "onnx.Softmax"(%10541) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %10543 = torch.operator "onnx.MatMul"(%10542, %10409) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %10544 = torch.operator "onnx.Transpose"(%10543) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %10545 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10546 = torch.operator "onnx.Mul"(%10382, %10545) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10547 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15089_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10548 = torch.operator "onnx.Unsqueeze"(%10369, %10547) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10549 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10550 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15092_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10551 = torch.operator "onnx.Unsqueeze"(%10546, %10550) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10552 = torch.operator "onnx.Concat"(%10548, %10549, %10551) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %10553 = torch.operator "onnx.Reshape"(%10544, %10552) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %10554 = torch.operator "onnx.Cast"(%10553) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %10555 = torch.operator "onnx.Concat"(%10554, %10366) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %10556 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.5_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10557 = torch.operator "onnx.Unsqueeze"(%10340, %10556) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %10558 = torch.operator "onnx.MatMul"(%10555, %998) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10559 = torch.operator "onnx.Add"(%442, %10558) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10560 = torch.operator "onnx.Mul"(%10557, %10559) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10561 = torch.operator "onnx.Add"(%10322, %10560) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10562 = torch.operator "onnx.Gemm"(%1285, %448, %449) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %10563 = torch.operator "onnx.Shape"(%10562) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %10564 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10565 = torch.operator "onnx.Gather"(%10563, %10564) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10566 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10567 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10568 = torch.operator "onnx.Add"(%10565, %10567) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10569 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10570 = torch.operator "onnx.Div"(%10568, %10569) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10571 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10572 = torch.operator "onnx.Mul"(%10570, %10571) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10573 = torch.operator "onnx.Slice"(%10562, %10566, %10572, %10564) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %10574 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10575 = torch.operator "onnx.Mul"(%10570, %10574) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10576 = torch.operator "onnx.Slice"(%10562, %10572, %10575, %10564) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %10577 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10578 = torch.operator "onnx.Mul"(%10570, %10577) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10579 = torch.operator "onnx.Slice"(%10562, %10575, %10578, %10564) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.62Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.6/norm/norm/Constant_attr__value" : tensor<3072xbf16> %10580 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.62Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.62Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.6/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %10581 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.62Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %10582 = torch.operator "onnx.LayerNormalization"(%10561, %10580, %10581) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10583 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10584 = torch.operator "onnx.Unsqueeze"(%10576, %10583) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %10585 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %10586 = torch.operator "onnx.Add"(%10584, %10585) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %10587 = torch.operator "onnx.Mul"(%10582, %10586) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10588 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10589 = torch.operator "onnx.Unsqueeze"(%10573, %10588) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %10590 = torch.operator "onnx.Add"(%10587, %10589) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10591 = torch.operator "onnx.MatMul"(%10590, %999) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10592 = torch.operator "onnx.Add"(%450, %10591) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10593 = torch.operator "onnx.Mul"(%10592, %10592) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10594 = torch.operator "onnx.Mul"(%10592, %10593) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10595 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %10596 = torch.operator "onnx.Mul"(%10595, %10594) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10597 = torch.operator "onnx.Add"(%10592, %10596) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10598 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %10599 = torch.operator "onnx.Mul"(%10598, %10597) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10600 = torch.operator "onnx.Tanh"(%10599) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10601 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %10602 = torch.operator "onnx.Add"(%10601, %10600) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10603 = torch.operator "onnx.Mul"(%10592, %10602) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10604 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %10605 = torch.operator "onnx.Mul"(%10604, %10603) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10606 = torch.operator "onnx.Shape"(%10590) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %10607 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10608 = torch.operator "onnx.Gather"(%10606, %10607) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10609 = torch.operator "onnx.MatMul"(%10590, %1000) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10610 = torch.operator "onnx.Add"(%454, %10609) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10611 = torch.operator "onnx.MatMul"(%10590, %1001) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10612 = torch.operator "onnx.Add"(%455, %10611) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10613 = torch.operator "onnx.MatMul"(%10590, %1002) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10614 = torch.operator "onnx.Add"(%456, %10613) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10615 = torch.operator "onnx.Shape"(%10612) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %10616 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10617 = torch.operator "onnx.Gather"(%10615, %10616) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10618 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10619 = torch.operator "onnx.Div"(%10617, %10618) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10620 = torch.operator "onnx.Cast"(%10619) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10621 = torch.operator "onnx.Cast"(%10620) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10622 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15164_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10623 = torch.operator "onnx.Unsqueeze"(%10608, %10622) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10624 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10625 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10626 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15168_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10627 = torch.operator "onnx.Unsqueeze"(%10621, %10626) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10628 = torch.operator "onnx.Concat"(%10623, %10624, %10625, %10627) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %10629 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15171_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10630 = torch.operator "onnx.Unsqueeze"(%10608, %10629) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10631 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10632 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10633 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15175_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10634 = torch.operator "onnx.Unsqueeze"(%10621, %10633) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10635 = torch.operator "onnx.Concat"(%10630, %10631, %10632, %10634) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %10636 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15178_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10637 = torch.operator "onnx.Unsqueeze"(%10608, %10636) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10638 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10639 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10640 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15182_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10641 = torch.operator "onnx.Unsqueeze"(%10621, %10640) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10642 = torch.operator "onnx.Concat"(%10637, %10638, %10639, %10641) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %10643 = torch.operator "onnx.Reshape"(%10610, %10628) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10644 = torch.operator "onnx.Transpose"(%10643) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %10645 = torch.operator "onnx.Reshape"(%10612, %10635) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10646 = torch.operator "onnx.Transpose"(%10645) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %10647 = torch.operator "onnx.Reshape"(%10614, %10642) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10648 = torch.operator "onnx.Transpose"(%10647) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %10649 = torch.operator "onnx.Cast"(%10644) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %10650 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %10651 = torch.operator "onnx.Pow"(%10649, %10650) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %10652 = torch.operator "onnx.ReduceMean"(%10651) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %10653 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %10654 = torch.operator "onnx.Add"(%10652, %10653) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %10655 = torch.operator "onnx.Sqrt"(%10654) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %10656 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %10657 = torch.operator "onnx.Div"(%10656, %10655) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %10658 = torch.operator "onnx.Cast"(%10644) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %10659 = torch.operator "onnx.Mul"(%10658, %10657) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %10660 = torch.operator "onnx.Cast"(%10659) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %10661 = torch.operator "onnx.Mul"(%10660, %452) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %10662 = torch.operator "onnx.Cast"(%10646) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %10663 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %10664 = torch.operator "onnx.Pow"(%10662, %10663) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %10665 = torch.operator "onnx.ReduceMean"(%10664) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %10666 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %10667 = torch.operator "onnx.Add"(%10665, %10666) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %10668 = torch.operator "onnx.Sqrt"(%10667) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %10669 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %10670 = torch.operator "onnx.Div"(%10669, %10668) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %10671 = torch.operator "onnx.Cast"(%10646) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %10672 = torch.operator "onnx.Mul"(%10671, %10670) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %10673 = torch.operator "onnx.Cast"(%10672) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %10674 = torch.operator "onnx.Mul"(%10673, %453) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %10675 = torch.operator "onnx.Shape"(%10661) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %10676 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10677 = torch.operator "onnx.Gather"(%10675, %10676) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10678 = torch.operator "onnx.Shape"(%10661) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %10679 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10680 = torch.operator "onnx.Gather"(%10678, %10679) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10681 = torch.operator "onnx.Shape"(%10661) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %10682 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10683 = torch.operator "onnx.Gather"(%10681, %10682) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10684 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15226_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10685 = torch.operator "onnx.Unsqueeze"(%10677, %10684) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10686 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15228_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10687 = torch.operator "onnx.Unsqueeze"(%10680, %10686) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10688 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15230_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10689 = torch.operator "onnx.Unsqueeze"(%10683, %10688) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10690 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10691 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10692 = torch.operator "onnx.Concat"(%10685, %10687, %10689, %10690, %10691) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %10693 = torch.operator "onnx.Reshape"(%10661, %10692) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %10694 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %10695:2 = torch.operator "onnx.Split"(%10693, %10694) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %10696 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10697 = torch.operator "onnx.Squeeze"(%10695#0, %10696) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10698 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10699 = torch.operator "onnx.Squeeze"(%10695#1, %10698) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10700 = torch.operator "onnx.Neg"(%10699) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %10701 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10702 = torch.operator "onnx.Unsqueeze"(%10700, %10701) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %10703 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10704 = torch.operator "onnx.Unsqueeze"(%10697, %10703) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %10705 = torch.operator "onnx.Concat"(%10702, %10704) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %10706 = torch.operator "onnx.Shape"(%10705) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %10707 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10708 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10709 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10710 = torch.operator "onnx.Slice"(%10706, %10708, %10709, %10707) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %10711 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10712 = torch.operator "onnx.Concat"(%10710, %10711) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %10713 = torch.operator "onnx.Reshape"(%10705, %10712) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10714 = torch.operator "onnx.Cast"(%10661) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %10715 = torch.operator "onnx.Mul"(%10714, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %10716 = torch.operator "onnx.Cast"(%10713) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %10717 = torch.operator "onnx.Mul"(%10716, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %10718 = torch.operator "onnx.Add"(%10715, %10717) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %10719 = torch.operator "onnx.Cast"(%10718) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %10720 = torch.operator "onnx.Shape"(%10674) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %10721 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10722 = torch.operator "onnx.Gather"(%10720, %10721) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10723 = torch.operator "onnx.Shape"(%10674) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %10724 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10725 = torch.operator "onnx.Gather"(%10723, %10724) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10726 = torch.operator "onnx.Shape"(%10674) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %10727 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10728 = torch.operator "onnx.Gather"(%10726, %10727) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10729 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15271_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10730 = torch.operator "onnx.Unsqueeze"(%10722, %10729) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10731 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15273_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10732 = torch.operator "onnx.Unsqueeze"(%10725, %10731) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10733 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15275_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10734 = torch.operator "onnx.Unsqueeze"(%10728, %10733) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10735 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10736 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10737 = torch.operator "onnx.Concat"(%10730, %10732, %10734, %10735, %10736) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %10738 = torch.operator "onnx.Reshape"(%10674, %10737) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %10739 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %10740:2 = torch.operator "onnx.Split"(%10738, %10739) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %10741 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10742 = torch.operator "onnx.Squeeze"(%10740#0, %10741) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10743 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10744 = torch.operator "onnx.Squeeze"(%10740#1, %10743) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10745 = torch.operator "onnx.Neg"(%10744) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %10746 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10747 = torch.operator "onnx.Unsqueeze"(%10745, %10746) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %10748 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10749 = torch.operator "onnx.Unsqueeze"(%10742, %10748) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %10750 = torch.operator "onnx.Concat"(%10747, %10749) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %10751 = torch.operator "onnx.Shape"(%10750) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %10752 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10753 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10754 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10755 = torch.operator "onnx.Slice"(%10751, %10753, %10754, %10752) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %10756 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10757 = torch.operator "onnx.Concat"(%10755, %10756) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %10758 = torch.operator "onnx.Reshape"(%10750, %10757) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10759 = torch.operator "onnx.Cast"(%10674) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %10760 = torch.operator "onnx.Mul"(%10759, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %10761 = torch.operator "onnx.Cast"(%10758) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %10762 = torch.operator "onnx.Mul"(%10761, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %10763 = torch.operator "onnx.Add"(%10760, %10762) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %10764 = torch.operator "onnx.Cast"(%10763) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %10765 = torch.operator "onnx.Shape"(%10719) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %10766 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10767 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10768 = torch.operator "onnx.Slice"(%10765, %10766, %10767) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10769 = torch.operator "onnx.Cast"(%10768) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %10770 = torch.operator "onnx.Sqrt"(%10769) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %10771 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %10772 = torch.operator "onnx.Cast"(%10770) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %10773 = torch.operator "onnx.Div"(%10771, %10772) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %10774 = torch.operator "onnx.Cast"(%10773) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %10775 = torch.operator "onnx.Transpose"(%10764) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %10776 = torch.operator "onnx.Sqrt"(%10774) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %10777 = torch.operator "onnx.Mul"(%10719, %10776) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %10778 = torch.operator "onnx.Sqrt"(%10774) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %10779 = torch.operator "onnx.Mul"(%10775, %10778) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %10780 = torch.operator "onnx.MatMul"(%10777, %10779) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %10781 = torch.operator "onnx.Softmax"(%10780) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %10782 = torch.operator "onnx.MatMul"(%10781, %10648) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %10783 = torch.operator "onnx.Transpose"(%10782) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %10784 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10785 = torch.operator "onnx.Mul"(%10621, %10784) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10786 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15328_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10787 = torch.operator "onnx.Unsqueeze"(%10608, %10786) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10788 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10789 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15331_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10790 = torch.operator "onnx.Unsqueeze"(%10785, %10789) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10791 = torch.operator "onnx.Concat"(%10787, %10788, %10790) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %10792 = torch.operator "onnx.Reshape"(%10783, %10791) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %10793 = torch.operator "onnx.Cast"(%10792) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %10794 = torch.operator "onnx.Concat"(%10793, %10605) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %10795 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.6_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10796 = torch.operator "onnx.Unsqueeze"(%10579, %10795) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %10797 = torch.operator "onnx.MatMul"(%10794, %1003) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10798 = torch.operator "onnx.Add"(%451, %10797) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10799 = torch.operator "onnx.Mul"(%10796, %10798) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10800 = torch.operator "onnx.Add"(%10561, %10799) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10801 = torch.operator "onnx.Gemm"(%1285, %457, %458) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %10802 = torch.operator "onnx.Shape"(%10801) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %10803 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10804 = torch.operator "onnx.Gather"(%10802, %10803) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10805 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10806 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10807 = torch.operator "onnx.Add"(%10804, %10806) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10808 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10809 = torch.operator "onnx.Div"(%10807, %10808) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10810 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10811 = torch.operator "onnx.Mul"(%10809, %10810) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10812 = torch.operator "onnx.Slice"(%10801, %10805, %10811, %10803) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %10813 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10814 = torch.operator "onnx.Mul"(%10809, %10813) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10815 = torch.operator "onnx.Slice"(%10801, %10811, %10814, %10803) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %10816 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10817 = torch.operator "onnx.Mul"(%10809, %10816) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10818 = torch.operator "onnx.Slice"(%10801, %10814, %10817, %10803) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.72Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.7/norm/norm/Constant_attr__value" : tensor<3072xbf16> %10819 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.72Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.72Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.7/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %10820 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.72Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %10821 = torch.operator "onnx.LayerNormalization"(%10800, %10819, %10820) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10822 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10823 = torch.operator "onnx.Unsqueeze"(%10815, %10822) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %10824 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %10825 = torch.operator "onnx.Add"(%10823, %10824) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %10826 = torch.operator "onnx.Mul"(%10821, %10825) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10827 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10828 = torch.operator "onnx.Unsqueeze"(%10812, %10827) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %10829 = torch.operator "onnx.Add"(%10826, %10828) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10830 = torch.operator "onnx.MatMul"(%10829, %1004) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10831 = torch.operator "onnx.Add"(%459, %10830) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10832 = torch.operator "onnx.Mul"(%10831, %10831) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10833 = torch.operator "onnx.Mul"(%10831, %10832) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10834 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %10835 = torch.operator "onnx.Mul"(%10834, %10833) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10836 = torch.operator "onnx.Add"(%10831, %10835) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10837 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %10838 = torch.operator "onnx.Mul"(%10837, %10836) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10839 = torch.operator "onnx.Tanh"(%10838) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10840 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %10841 = torch.operator "onnx.Add"(%10840, %10839) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10842 = torch.operator "onnx.Mul"(%10831, %10841) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10843 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %10844 = torch.operator "onnx.Mul"(%10843, %10842) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %10845 = torch.operator "onnx.Shape"(%10829) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %10846 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10847 = torch.operator "onnx.Gather"(%10845, %10846) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10848 = torch.operator "onnx.MatMul"(%10829, %1005) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10849 = torch.operator "onnx.Add"(%463, %10848) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10850 = torch.operator "onnx.MatMul"(%10829, %1006) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10851 = torch.operator "onnx.Add"(%464, %10850) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10852 = torch.operator "onnx.MatMul"(%10829, %1007) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10853 = torch.operator "onnx.Add"(%465, %10852) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %10854 = torch.operator "onnx.Shape"(%10851) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %10855 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10856 = torch.operator "onnx.Gather"(%10854, %10855) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10857 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10858 = torch.operator "onnx.Div"(%10856, %10857) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10859 = torch.operator "onnx.Cast"(%10858) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10860 = torch.operator "onnx.Cast"(%10859) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10861 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15403_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10862 = torch.operator "onnx.Unsqueeze"(%10847, %10861) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10863 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10864 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10865 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15407_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10866 = torch.operator "onnx.Unsqueeze"(%10860, %10865) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10867 = torch.operator "onnx.Concat"(%10862, %10863, %10864, %10866) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %10868 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15410_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10869 = torch.operator "onnx.Unsqueeze"(%10847, %10868) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10870 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10871 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10872 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15414_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10873 = torch.operator "onnx.Unsqueeze"(%10860, %10872) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10874 = torch.operator "onnx.Concat"(%10869, %10870, %10871, %10873) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %10875 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15417_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10876 = torch.operator "onnx.Unsqueeze"(%10847, %10875) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10877 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10878 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10879 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15421_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10880 = torch.operator "onnx.Unsqueeze"(%10860, %10879) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10881 = torch.operator "onnx.Concat"(%10876, %10877, %10878, %10880) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %10882 = torch.operator "onnx.Reshape"(%10849, %10867) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10883 = torch.operator "onnx.Transpose"(%10882) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %10884 = torch.operator "onnx.Reshape"(%10851, %10874) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10885 = torch.operator "onnx.Transpose"(%10884) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %10886 = torch.operator "onnx.Reshape"(%10853, %10881) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10887 = torch.operator "onnx.Transpose"(%10886) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %10888 = torch.operator "onnx.Cast"(%10883) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %10889 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %10890 = torch.operator "onnx.Pow"(%10888, %10889) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %10891 = torch.operator "onnx.ReduceMean"(%10890) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %10892 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %10893 = torch.operator "onnx.Add"(%10891, %10892) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %10894 = torch.operator "onnx.Sqrt"(%10893) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %10895 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %10896 = torch.operator "onnx.Div"(%10895, %10894) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %10897 = torch.operator "onnx.Cast"(%10883) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %10898 = torch.operator "onnx.Mul"(%10897, %10896) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %10899 = torch.operator "onnx.Cast"(%10898) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %10900 = torch.operator "onnx.Mul"(%10899, %461) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %10901 = torch.operator "onnx.Cast"(%10885) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %10902 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %10903 = torch.operator "onnx.Pow"(%10901, %10902) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %10904 = torch.operator "onnx.ReduceMean"(%10903) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %10905 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %10906 = torch.operator "onnx.Add"(%10904, %10905) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %10907 = torch.operator "onnx.Sqrt"(%10906) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %10908 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %10909 = torch.operator "onnx.Div"(%10908, %10907) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %10910 = torch.operator "onnx.Cast"(%10885) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %10911 = torch.operator "onnx.Mul"(%10910, %10909) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %10912 = torch.operator "onnx.Cast"(%10911) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %10913 = torch.operator "onnx.Mul"(%10912, %462) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %10914 = torch.operator "onnx.Shape"(%10900) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %10915 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10916 = torch.operator "onnx.Gather"(%10914, %10915) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10917 = torch.operator "onnx.Shape"(%10900) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %10918 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10919 = torch.operator "onnx.Gather"(%10917, %10918) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10920 = torch.operator "onnx.Shape"(%10900) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %10921 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10922 = torch.operator "onnx.Gather"(%10920, %10921) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10923 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15465_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10924 = torch.operator "onnx.Unsqueeze"(%10916, %10923) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10925 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15467_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10926 = torch.operator "onnx.Unsqueeze"(%10919, %10925) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10927 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15469_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10928 = torch.operator "onnx.Unsqueeze"(%10922, %10927) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10929 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10930 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10931 = torch.operator "onnx.Concat"(%10924, %10926, %10928, %10929, %10930) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %10932 = torch.operator "onnx.Reshape"(%10900, %10931) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %10933 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %10934:2 = torch.operator "onnx.Split"(%10932, %10933) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %10935 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10936 = torch.operator "onnx.Squeeze"(%10934#0, %10935) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10937 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10938 = torch.operator "onnx.Squeeze"(%10934#1, %10937) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10939 = torch.operator "onnx.Neg"(%10938) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %10940 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10941 = torch.operator "onnx.Unsqueeze"(%10939, %10940) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %10942 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10943 = torch.operator "onnx.Unsqueeze"(%10936, %10942) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %10944 = torch.operator "onnx.Concat"(%10941, %10943) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %10945 = torch.operator "onnx.Shape"(%10944) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %10946 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10947 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10948 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10949 = torch.operator "onnx.Slice"(%10945, %10947, %10948, %10946) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %10950 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10951 = torch.operator "onnx.Concat"(%10949, %10950) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %10952 = torch.operator "onnx.Reshape"(%10944, %10951) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10953 = torch.operator "onnx.Cast"(%10900) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %10954 = torch.operator "onnx.Mul"(%10953, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %10955 = torch.operator "onnx.Cast"(%10952) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %10956 = torch.operator "onnx.Mul"(%10955, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %10957 = torch.operator "onnx.Add"(%10954, %10956) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %10958 = torch.operator "onnx.Cast"(%10957) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %10959 = torch.operator "onnx.Shape"(%10913) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %10960 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10961 = torch.operator "onnx.Gather"(%10959, %10960) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10962 = torch.operator "onnx.Shape"(%10913) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %10963 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10964 = torch.operator "onnx.Gather"(%10962, %10963) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10965 = torch.operator "onnx.Shape"(%10913) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %10966 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %10967 = torch.operator "onnx.Gather"(%10965, %10966) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %10968 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15510_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10969 = torch.operator "onnx.Unsqueeze"(%10961, %10968) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10970 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15512_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10971 = torch.operator "onnx.Unsqueeze"(%10964, %10970) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10972 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15514_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10973 = torch.operator "onnx.Unsqueeze"(%10967, %10972) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %10974 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10975 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10976 = torch.operator "onnx.Concat"(%10969, %10971, %10973, %10974, %10975) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %10977 = torch.operator "onnx.Reshape"(%10913, %10976) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %10978 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %10979:2 = torch.operator "onnx.Split"(%10977, %10978) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %10980 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10981 = torch.operator "onnx.Squeeze"(%10979#0, %10980) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10982 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10983 = torch.operator "onnx.Squeeze"(%10979#1, %10982) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10984 = torch.operator "onnx.Neg"(%10983) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %10985 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10986 = torch.operator "onnx.Unsqueeze"(%10984, %10985) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %10987 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10988 = torch.operator "onnx.Unsqueeze"(%10981, %10987) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %10989 = torch.operator "onnx.Concat"(%10986, %10988) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %10990 = torch.operator "onnx.Shape"(%10989) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %10991 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10992 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10993 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10994 = torch.operator "onnx.Slice"(%10990, %10992, %10993, %10991) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %10995 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %10996 = torch.operator "onnx.Concat"(%10994, %10995) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %10997 = torch.operator "onnx.Reshape"(%10989, %10996) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %10998 = torch.operator "onnx.Cast"(%10913) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %10999 = torch.operator "onnx.Mul"(%10998, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %11000 = torch.operator "onnx.Cast"(%10997) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %11001 = torch.operator "onnx.Mul"(%11000, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %11002 = torch.operator "onnx.Add"(%10999, %11001) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %11003 = torch.operator "onnx.Cast"(%11002) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %11004 = torch.operator "onnx.Shape"(%10958) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %11005 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11006 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11007 = torch.operator "onnx.Slice"(%11004, %11005, %11006) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11008 = torch.operator "onnx.Cast"(%11007) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %11009 = torch.operator "onnx.Sqrt"(%11008) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %11010 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %11011 = torch.operator "onnx.Cast"(%11009) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %11012 = torch.operator "onnx.Div"(%11010, %11011) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %11013 = torch.operator "onnx.Cast"(%11012) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %11014 = torch.operator "onnx.Transpose"(%11003) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %11015 = torch.operator "onnx.Sqrt"(%11013) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %11016 = torch.operator "onnx.Mul"(%10958, %11015) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %11017 = torch.operator "onnx.Sqrt"(%11013) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %11018 = torch.operator "onnx.Mul"(%11014, %11017) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %11019 = torch.operator "onnx.MatMul"(%11016, %11018) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %11020 = torch.operator "onnx.Softmax"(%11019) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %11021 = torch.operator "onnx.MatMul"(%11020, %10887) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %11022 = torch.operator "onnx.Transpose"(%11021) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %11023 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11024 = torch.operator "onnx.Mul"(%10860, %11023) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11025 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15567_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11026 = torch.operator "onnx.Unsqueeze"(%10847, %11025) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11027 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11028 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15570_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11029 = torch.operator "onnx.Unsqueeze"(%11024, %11028) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11030 = torch.operator "onnx.Concat"(%11026, %11027, %11029) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %11031 = torch.operator "onnx.Reshape"(%11022, %11030) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %11032 = torch.operator "onnx.Cast"(%11031) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %11033 = torch.operator "onnx.Concat"(%11032, %10844) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %11034 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.7_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11035 = torch.operator "onnx.Unsqueeze"(%10818, %11034) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %11036 = torch.operator "onnx.MatMul"(%11033, %1008) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11037 = torch.operator "onnx.Add"(%460, %11036) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11038 = torch.operator "onnx.Mul"(%11035, %11037) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11039 = torch.operator "onnx.Add"(%10800, %11038) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11040 = torch.operator "onnx.Gemm"(%1285, %466, %467) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %11041 = torch.operator "onnx.Shape"(%11040) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %11042 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11043 = torch.operator "onnx.Gather"(%11041, %11042) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11044 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11045 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11046 = torch.operator "onnx.Add"(%11043, %11045) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11047 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11048 = torch.operator "onnx.Div"(%11046, %11047) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11049 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11050 = torch.operator "onnx.Mul"(%11048, %11049) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11051 = torch.operator "onnx.Slice"(%11040, %11044, %11050, %11042) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %11052 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11053 = torch.operator "onnx.Mul"(%11048, %11052) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11054 = torch.operator "onnx.Slice"(%11040, %11050, %11053, %11042) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %11055 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11056 = torch.operator "onnx.Mul"(%11048, %11055) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11057 = torch.operator "onnx.Slice"(%11040, %11053, %11056, %11042) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.82Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.8/norm/norm/Constant_attr__value" : tensor<3072xbf16> %11058 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.82Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.82Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.8/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %11059 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.82Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %11060 = torch.operator "onnx.LayerNormalization"(%11039, %11058, %11059) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11061 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11062 = torch.operator "onnx.Unsqueeze"(%11054, %11061) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %11063 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %11064 = torch.operator "onnx.Add"(%11062, %11063) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %11065 = torch.operator "onnx.Mul"(%11060, %11064) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11066 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11067 = torch.operator "onnx.Unsqueeze"(%11051, %11066) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %11068 = torch.operator "onnx.Add"(%11065, %11067) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11069 = torch.operator "onnx.MatMul"(%11068, %1009) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11070 = torch.operator "onnx.Add"(%468, %11069) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11071 = torch.operator "onnx.Mul"(%11070, %11070) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11072 = torch.operator "onnx.Mul"(%11070, %11071) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11073 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %11074 = torch.operator "onnx.Mul"(%11073, %11072) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11075 = torch.operator "onnx.Add"(%11070, %11074) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11076 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %11077 = torch.operator "onnx.Mul"(%11076, %11075) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11078 = torch.operator "onnx.Tanh"(%11077) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11079 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %11080 = torch.operator "onnx.Add"(%11079, %11078) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11081 = torch.operator "onnx.Mul"(%11070, %11080) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11082 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %11083 = torch.operator "onnx.Mul"(%11082, %11081) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11084 = torch.operator "onnx.Shape"(%11068) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %11085 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11086 = torch.operator "onnx.Gather"(%11084, %11085) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11087 = torch.operator "onnx.MatMul"(%11068, %1010) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11088 = torch.operator "onnx.Add"(%472, %11087) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11089 = torch.operator "onnx.MatMul"(%11068, %1011) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11090 = torch.operator "onnx.Add"(%473, %11089) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11091 = torch.operator "onnx.MatMul"(%11068, %1012) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11092 = torch.operator "onnx.Add"(%474, %11091) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11093 = torch.operator "onnx.Shape"(%11090) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %11094 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11095 = torch.operator "onnx.Gather"(%11093, %11094) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11096 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11097 = torch.operator "onnx.Div"(%11095, %11096) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11098 = torch.operator "onnx.Cast"(%11097) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11099 = torch.operator "onnx.Cast"(%11098) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11100 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15642_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11101 = torch.operator "onnx.Unsqueeze"(%11086, %11100) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11102 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11103 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11104 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15646_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11105 = torch.operator "onnx.Unsqueeze"(%11099, %11104) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11106 = torch.operator "onnx.Concat"(%11101, %11102, %11103, %11105) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %11107 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15649_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11108 = torch.operator "onnx.Unsqueeze"(%11086, %11107) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11109 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11110 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11111 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15653_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11112 = torch.operator "onnx.Unsqueeze"(%11099, %11111) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11113 = torch.operator "onnx.Concat"(%11108, %11109, %11110, %11112) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %11114 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15656_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11115 = torch.operator "onnx.Unsqueeze"(%11086, %11114) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11116 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11117 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11118 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15660_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11119 = torch.operator "onnx.Unsqueeze"(%11099, %11118) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11120 = torch.operator "onnx.Concat"(%11115, %11116, %11117, %11119) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %11121 = torch.operator "onnx.Reshape"(%11088, %11106) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %11122 = torch.operator "onnx.Transpose"(%11121) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %11123 = torch.operator "onnx.Reshape"(%11090, %11113) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %11124 = torch.operator "onnx.Transpose"(%11123) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %11125 = torch.operator "onnx.Reshape"(%11092, %11120) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %11126 = torch.operator "onnx.Transpose"(%11125) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %11127 = torch.operator "onnx.Cast"(%11122) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %11128 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %11129 = torch.operator "onnx.Pow"(%11127, %11128) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %11130 = torch.operator "onnx.ReduceMean"(%11129) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %11131 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %11132 = torch.operator "onnx.Add"(%11130, %11131) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %11133 = torch.operator "onnx.Sqrt"(%11132) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %11134 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %11135 = torch.operator "onnx.Div"(%11134, %11133) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %11136 = torch.operator "onnx.Cast"(%11122) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %11137 = torch.operator "onnx.Mul"(%11136, %11135) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %11138 = torch.operator "onnx.Cast"(%11137) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %11139 = torch.operator "onnx.Mul"(%11138, %470) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %11140 = torch.operator "onnx.Cast"(%11124) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %11141 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %11142 = torch.operator "onnx.Pow"(%11140, %11141) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %11143 = torch.operator "onnx.ReduceMean"(%11142) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %11144 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %11145 = torch.operator "onnx.Add"(%11143, %11144) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %11146 = torch.operator "onnx.Sqrt"(%11145) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %11147 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %11148 = torch.operator "onnx.Div"(%11147, %11146) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %11149 = torch.operator "onnx.Cast"(%11124) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %11150 = torch.operator "onnx.Mul"(%11149, %11148) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %11151 = torch.operator "onnx.Cast"(%11150) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %11152 = torch.operator "onnx.Mul"(%11151, %471) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %11153 = torch.operator "onnx.Shape"(%11139) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %11154 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11155 = torch.operator "onnx.Gather"(%11153, %11154) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11156 = torch.operator "onnx.Shape"(%11139) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %11157 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11158 = torch.operator "onnx.Gather"(%11156, %11157) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11159 = torch.operator "onnx.Shape"(%11139) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %11160 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11161 = torch.operator "onnx.Gather"(%11159, %11160) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11162 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15704_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11163 = torch.operator "onnx.Unsqueeze"(%11155, %11162) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11164 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15706_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11165 = torch.operator "onnx.Unsqueeze"(%11158, %11164) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11166 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15708_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11167 = torch.operator "onnx.Unsqueeze"(%11161, %11166) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11168 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11169 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11170 = torch.operator "onnx.Concat"(%11163, %11165, %11167, %11168, %11169) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %11171 = torch.operator "onnx.Reshape"(%11139, %11170) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %11172 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %11173:2 = torch.operator "onnx.Split"(%11171, %11172) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %11174 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11175 = torch.operator "onnx.Squeeze"(%11173#0, %11174) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %11176 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11177 = torch.operator "onnx.Squeeze"(%11173#1, %11176) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %11178 = torch.operator "onnx.Neg"(%11177) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %11179 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11180 = torch.operator "onnx.Unsqueeze"(%11178, %11179) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %11181 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11182 = torch.operator "onnx.Unsqueeze"(%11175, %11181) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %11183 = torch.operator "onnx.Concat"(%11180, %11182) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %11184 = torch.operator "onnx.Shape"(%11183) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %11185 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11186 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11187 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11188 = torch.operator "onnx.Slice"(%11184, %11186, %11187, %11185) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %11189 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11190 = torch.operator "onnx.Concat"(%11188, %11189) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %11191 = torch.operator "onnx.Reshape"(%11183, %11190) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %11192 = torch.operator "onnx.Cast"(%11139) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %11193 = torch.operator "onnx.Mul"(%11192, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %11194 = torch.operator "onnx.Cast"(%11191) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %11195 = torch.operator "onnx.Mul"(%11194, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %11196 = torch.operator "onnx.Add"(%11193, %11195) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %11197 = torch.operator "onnx.Cast"(%11196) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %11198 = torch.operator "onnx.Shape"(%11152) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %11199 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11200 = torch.operator "onnx.Gather"(%11198, %11199) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11201 = torch.operator "onnx.Shape"(%11152) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %11202 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11203 = torch.operator "onnx.Gather"(%11201, %11202) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11204 = torch.operator "onnx.Shape"(%11152) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %11205 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11206 = torch.operator "onnx.Gather"(%11204, %11205) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11207 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15749_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11208 = torch.operator "onnx.Unsqueeze"(%11200, %11207) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11209 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15751_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11210 = torch.operator "onnx.Unsqueeze"(%11203, %11209) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11211 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15753_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11212 = torch.operator "onnx.Unsqueeze"(%11206, %11211) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11213 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11214 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11215 = torch.operator "onnx.Concat"(%11208, %11210, %11212, %11213, %11214) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %11216 = torch.operator "onnx.Reshape"(%11152, %11215) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %11217 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %11218:2 = torch.operator "onnx.Split"(%11216, %11217) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %11219 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11220 = torch.operator "onnx.Squeeze"(%11218#0, %11219) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %11221 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11222 = torch.operator "onnx.Squeeze"(%11218#1, %11221) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %11223 = torch.operator "onnx.Neg"(%11222) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %11224 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11225 = torch.operator "onnx.Unsqueeze"(%11223, %11224) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %11226 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11227 = torch.operator "onnx.Unsqueeze"(%11220, %11226) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %11228 = torch.operator "onnx.Concat"(%11225, %11227) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %11229 = torch.operator "onnx.Shape"(%11228) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %11230 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11231 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11232 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11233 = torch.operator "onnx.Slice"(%11229, %11231, %11232, %11230) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %11234 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11235 = torch.operator "onnx.Concat"(%11233, %11234) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %11236 = torch.operator "onnx.Reshape"(%11228, %11235) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %11237 = torch.operator "onnx.Cast"(%11152) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %11238 = torch.operator "onnx.Mul"(%11237, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %11239 = torch.operator "onnx.Cast"(%11236) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %11240 = torch.operator "onnx.Mul"(%11239, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %11241 = torch.operator "onnx.Add"(%11238, %11240) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %11242 = torch.operator "onnx.Cast"(%11241) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %11243 = torch.operator "onnx.Shape"(%11197) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %11244 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11245 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11246 = torch.operator "onnx.Slice"(%11243, %11244, %11245) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11247 = torch.operator "onnx.Cast"(%11246) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %11248 = torch.operator "onnx.Sqrt"(%11247) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %11249 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %11250 = torch.operator "onnx.Cast"(%11248) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %11251 = torch.operator "onnx.Div"(%11249, %11250) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %11252 = torch.operator "onnx.Cast"(%11251) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %11253 = torch.operator "onnx.Transpose"(%11242) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %11254 = torch.operator "onnx.Sqrt"(%11252) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %11255 = torch.operator "onnx.Mul"(%11197, %11254) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %11256 = torch.operator "onnx.Sqrt"(%11252) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %11257 = torch.operator "onnx.Mul"(%11253, %11256) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %11258 = torch.operator "onnx.MatMul"(%11255, %11257) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %11259 = torch.operator "onnx.Softmax"(%11258) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %11260 = torch.operator "onnx.MatMul"(%11259, %11126) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %11261 = torch.operator "onnx.Transpose"(%11260) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %11262 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11263 = torch.operator "onnx.Mul"(%11099, %11262) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11264 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15806_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11265 = torch.operator "onnx.Unsqueeze"(%11086, %11264) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11266 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11267 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15809_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11268 = torch.operator "onnx.Unsqueeze"(%11263, %11267) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11269 = torch.operator "onnx.Concat"(%11265, %11266, %11268) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %11270 = torch.operator "onnx.Reshape"(%11261, %11269) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %11271 = torch.operator "onnx.Cast"(%11270) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %11272 = torch.operator "onnx.Concat"(%11271, %11083) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %11273 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.8_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11274 = torch.operator "onnx.Unsqueeze"(%11057, %11273) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %11275 = torch.operator "onnx.MatMul"(%11272, %1013) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11276 = torch.operator "onnx.Add"(%469, %11275) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11277 = torch.operator "onnx.Mul"(%11274, %11276) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11278 = torch.operator "onnx.Add"(%11039, %11277) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11279 = torch.operator "onnx.Gemm"(%1285, %475, %476) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %11280 = torch.operator "onnx.Shape"(%11279) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %11281 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11282 = torch.operator "onnx.Gather"(%11280, %11281) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11283 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11284 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11285 = torch.operator "onnx.Add"(%11282, %11284) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11286 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11287 = torch.operator "onnx.Div"(%11285, %11286) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11288 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11289 = torch.operator "onnx.Mul"(%11287, %11288) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11290 = torch.operator "onnx.Slice"(%11279, %11283, %11289, %11281) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %11291 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11292 = torch.operator "onnx.Mul"(%11287, %11291) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11293 = torch.operator "onnx.Slice"(%11279, %11289, %11292, %11281) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %11294 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11295 = torch.operator "onnx.Mul"(%11287, %11294) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11296 = torch.operator "onnx.Slice"(%11279, %11292, %11295, %11281) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.92Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.9/norm/norm/Constant_attr__value" : tensor<3072xbf16> %11297 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.92Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.92Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.9/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %11298 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.92Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %11299 = torch.operator "onnx.LayerNormalization"(%11278, %11297, %11298) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11300 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11301 = torch.operator "onnx.Unsqueeze"(%11293, %11300) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %11302 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %11303 = torch.operator "onnx.Add"(%11301, %11302) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %11304 = torch.operator "onnx.Mul"(%11299, %11303) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11305 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11306 = torch.operator "onnx.Unsqueeze"(%11290, %11305) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %11307 = torch.operator "onnx.Add"(%11304, %11306) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11308 = torch.operator "onnx.MatMul"(%11307, %1014) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11309 = torch.operator "onnx.Add"(%477, %11308) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11310 = torch.operator "onnx.Mul"(%11309, %11309) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11311 = torch.operator "onnx.Mul"(%11309, %11310) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11312 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %11313 = torch.operator "onnx.Mul"(%11312, %11311) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11314 = torch.operator "onnx.Add"(%11309, %11313) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11315 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %11316 = torch.operator "onnx.Mul"(%11315, %11314) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11317 = torch.operator "onnx.Tanh"(%11316) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11318 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %11319 = torch.operator "onnx.Add"(%11318, %11317) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11320 = torch.operator "onnx.Mul"(%11309, %11319) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11321 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %11322 = torch.operator "onnx.Mul"(%11321, %11320) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11323 = torch.operator "onnx.Shape"(%11307) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %11324 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11325 = torch.operator "onnx.Gather"(%11323, %11324) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11326 = torch.operator "onnx.MatMul"(%11307, %1015) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11327 = torch.operator "onnx.Add"(%481, %11326) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11328 = torch.operator "onnx.MatMul"(%11307, %1016) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11329 = torch.operator "onnx.Add"(%482, %11328) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11330 = torch.operator "onnx.MatMul"(%11307, %1017) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11331 = torch.operator "onnx.Add"(%483, %11330) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11332 = torch.operator "onnx.Shape"(%11329) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %11333 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11334 = torch.operator "onnx.Gather"(%11332, %11333) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11335 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11336 = torch.operator "onnx.Div"(%11334, %11335) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11337 = torch.operator "onnx.Cast"(%11336) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11338 = torch.operator "onnx.Cast"(%11337) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11339 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15881_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11340 = torch.operator "onnx.Unsqueeze"(%11325, %11339) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11341 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11342 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11343 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15885_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11344 = torch.operator "onnx.Unsqueeze"(%11338, %11343) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11345 = torch.operator "onnx.Concat"(%11340, %11341, %11342, %11344) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %11346 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15888_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11347 = torch.operator "onnx.Unsqueeze"(%11325, %11346) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11348 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11349 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11350 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15892_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11351 = torch.operator "onnx.Unsqueeze"(%11338, %11350) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11352 = torch.operator "onnx.Concat"(%11347, %11348, %11349, %11351) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %11353 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15895_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11354 = torch.operator "onnx.Unsqueeze"(%11325, %11353) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11355 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11356 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11357 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15899_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11358 = torch.operator "onnx.Unsqueeze"(%11338, %11357) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11359 = torch.operator "onnx.Concat"(%11354, %11355, %11356, %11358) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %11360 = torch.operator "onnx.Reshape"(%11327, %11345) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %11361 = torch.operator "onnx.Transpose"(%11360) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %11362 = torch.operator "onnx.Reshape"(%11329, %11352) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %11363 = torch.operator "onnx.Transpose"(%11362) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %11364 = torch.operator "onnx.Reshape"(%11331, %11359) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %11365 = torch.operator "onnx.Transpose"(%11364) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %11366 = torch.operator "onnx.Cast"(%11361) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %11367 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %11368 = torch.operator "onnx.Pow"(%11366, %11367) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %11369 = torch.operator "onnx.ReduceMean"(%11368) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %11370 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %11371 = torch.operator "onnx.Add"(%11369, %11370) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %11372 = torch.operator "onnx.Sqrt"(%11371) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %11373 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %11374 = torch.operator "onnx.Div"(%11373, %11372) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %11375 = torch.operator "onnx.Cast"(%11361) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %11376 = torch.operator "onnx.Mul"(%11375, %11374) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %11377 = torch.operator "onnx.Cast"(%11376) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %11378 = torch.operator "onnx.Mul"(%11377, %479) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %11379 = torch.operator "onnx.Cast"(%11363) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %11380 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %11381 = torch.operator "onnx.Pow"(%11379, %11380) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %11382 = torch.operator "onnx.ReduceMean"(%11381) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %11383 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %11384 = torch.operator "onnx.Add"(%11382, %11383) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %11385 = torch.operator "onnx.Sqrt"(%11384) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %11386 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %11387 = torch.operator "onnx.Div"(%11386, %11385) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %11388 = torch.operator "onnx.Cast"(%11363) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %11389 = torch.operator "onnx.Mul"(%11388, %11387) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %11390 = torch.operator "onnx.Cast"(%11389) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %11391 = torch.operator "onnx.Mul"(%11390, %480) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %11392 = torch.operator "onnx.Shape"(%11378) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %11393 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11394 = torch.operator "onnx.Gather"(%11392, %11393) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11395 = torch.operator "onnx.Shape"(%11378) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %11396 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11397 = torch.operator "onnx.Gather"(%11395, %11396) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11398 = torch.operator "onnx.Shape"(%11378) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %11399 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11400 = torch.operator "onnx.Gather"(%11398, %11399) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11401 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15943_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11402 = torch.operator "onnx.Unsqueeze"(%11394, %11401) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11403 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15945_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11404 = torch.operator "onnx.Unsqueeze"(%11397, %11403) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11405 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15947_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11406 = torch.operator "onnx.Unsqueeze"(%11400, %11405) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11407 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11408 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11409 = torch.operator "onnx.Concat"(%11402, %11404, %11406, %11407, %11408) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %11410 = torch.operator "onnx.Reshape"(%11378, %11409) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %11411 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %11412:2 = torch.operator "onnx.Split"(%11410, %11411) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %11413 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11414 = torch.operator "onnx.Squeeze"(%11412#0, %11413) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %11415 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11416 = torch.operator "onnx.Squeeze"(%11412#1, %11415) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %11417 = torch.operator "onnx.Neg"(%11416) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %11418 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11419 = torch.operator "onnx.Unsqueeze"(%11417, %11418) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %11420 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11421 = torch.operator "onnx.Unsqueeze"(%11414, %11420) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %11422 = torch.operator "onnx.Concat"(%11419, %11421) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %11423 = torch.operator "onnx.Shape"(%11422) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %11424 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11425 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11426 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11427 = torch.operator "onnx.Slice"(%11423, %11425, %11426, %11424) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %11428 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11429 = torch.operator "onnx.Concat"(%11427, %11428) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %11430 = torch.operator "onnx.Reshape"(%11422, %11429) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %11431 = torch.operator "onnx.Cast"(%11378) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %11432 = torch.operator "onnx.Mul"(%11431, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %11433 = torch.operator "onnx.Cast"(%11430) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %11434 = torch.operator "onnx.Mul"(%11433, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %11435 = torch.operator "onnx.Add"(%11432, %11434) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %11436 = torch.operator "onnx.Cast"(%11435) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %11437 = torch.operator "onnx.Shape"(%11391) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %11438 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11439 = torch.operator "onnx.Gather"(%11437, %11438) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11440 = torch.operator "onnx.Shape"(%11391) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %11441 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11442 = torch.operator "onnx.Gather"(%11440, %11441) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11443 = torch.operator "onnx.Shape"(%11391) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %11444 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11445 = torch.operator "onnx.Gather"(%11443, %11444) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11446 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15988_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11447 = torch.operator "onnx.Unsqueeze"(%11439, %11446) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11448 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15990_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11449 = torch.operator "onnx.Unsqueeze"(%11442, %11448) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11450 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_15992_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11451 = torch.operator "onnx.Unsqueeze"(%11445, %11450) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11452 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11453 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11454 = torch.operator "onnx.Concat"(%11447, %11449, %11451, %11452, %11453) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %11455 = torch.operator "onnx.Reshape"(%11391, %11454) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %11456 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %11457:2 = torch.operator "onnx.Split"(%11455, %11456) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %11458 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11459 = torch.operator "onnx.Squeeze"(%11457#0, %11458) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %11460 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11461 = torch.operator "onnx.Squeeze"(%11457#1, %11460) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %11462 = torch.operator "onnx.Neg"(%11461) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %11463 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11464 = torch.operator "onnx.Unsqueeze"(%11462, %11463) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %11465 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11466 = torch.operator "onnx.Unsqueeze"(%11459, %11465) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %11467 = torch.operator "onnx.Concat"(%11464, %11466) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %11468 = torch.operator "onnx.Shape"(%11467) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %11469 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11470 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11471 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11472 = torch.operator "onnx.Slice"(%11468, %11470, %11471, %11469) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %11473 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11474 = torch.operator "onnx.Concat"(%11472, %11473) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %11475 = torch.operator "onnx.Reshape"(%11467, %11474) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %11476 = torch.operator "onnx.Cast"(%11391) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %11477 = torch.operator "onnx.Mul"(%11476, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %11478 = torch.operator "onnx.Cast"(%11475) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %11479 = torch.operator "onnx.Mul"(%11478, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %11480 = torch.operator "onnx.Add"(%11477, %11479) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %11481 = torch.operator "onnx.Cast"(%11480) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %11482 = torch.operator "onnx.Shape"(%11436) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %11483 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11484 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11485 = torch.operator "onnx.Slice"(%11482, %11483, %11484) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11486 = torch.operator "onnx.Cast"(%11485) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %11487 = torch.operator "onnx.Sqrt"(%11486) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %11488 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %11489 = torch.operator "onnx.Cast"(%11487) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %11490 = torch.operator "onnx.Div"(%11488, %11489) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %11491 = torch.operator "onnx.Cast"(%11490) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %11492 = torch.operator "onnx.Transpose"(%11481) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %11493 = torch.operator "onnx.Sqrt"(%11491) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %11494 = torch.operator "onnx.Mul"(%11436, %11493) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %11495 = torch.operator "onnx.Sqrt"(%11491) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %11496 = torch.operator "onnx.Mul"(%11492, %11495) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %11497 = torch.operator "onnx.MatMul"(%11494, %11496) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %11498 = torch.operator "onnx.Softmax"(%11497) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %11499 = torch.operator "onnx.MatMul"(%11498, %11365) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %11500 = torch.operator "onnx.Transpose"(%11499) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %11501 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11502 = torch.operator "onnx.Mul"(%11338, %11501) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11503 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16045_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11504 = torch.operator "onnx.Unsqueeze"(%11325, %11503) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11505 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11506 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16048_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11507 = torch.operator "onnx.Unsqueeze"(%11502, %11506) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11508 = torch.operator "onnx.Concat"(%11504, %11505, %11507) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %11509 = torch.operator "onnx.Reshape"(%11500, %11508) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %11510 = torch.operator "onnx.Cast"(%11509) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %11511 = torch.operator "onnx.Concat"(%11510, %11322) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %11512 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.9_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11513 = torch.operator "onnx.Unsqueeze"(%11296, %11512) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %11514 = torch.operator "onnx.MatMul"(%11511, %1018) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11515 = torch.operator "onnx.Add"(%478, %11514) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11516 = torch.operator "onnx.Mul"(%11513, %11515) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11517 = torch.operator "onnx.Add"(%11278, %11516) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11518 = torch.operator "onnx.Gemm"(%1285, %484, %485) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %11519 = torch.operator "onnx.Shape"(%11518) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %11520 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11521 = torch.operator "onnx.Gather"(%11519, %11520) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11522 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11523 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11524 = torch.operator "onnx.Add"(%11521, %11523) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11525 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11526 = torch.operator "onnx.Div"(%11524, %11525) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11527 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11528 = torch.operator "onnx.Mul"(%11526, %11527) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11529 = torch.operator "onnx.Slice"(%11518, %11522, %11528, %11520) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %11530 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11531 = torch.operator "onnx.Mul"(%11526, %11530) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11532 = torch.operator "onnx.Slice"(%11518, %11528, %11531, %11520) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %11533 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11534 = torch.operator "onnx.Mul"(%11526, %11533) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11535 = torch.operator "onnx.Slice"(%11518, %11531, %11534, %11520) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.102Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.10/norm/norm/Constant_attr__value" : tensor<3072xbf16> %11536 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.102Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.102Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.10/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %11537 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.102Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %11538 = torch.operator "onnx.LayerNormalization"(%11517, %11536, %11537) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11539 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11540 = torch.operator "onnx.Unsqueeze"(%11532, %11539) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %11541 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %11542 = torch.operator "onnx.Add"(%11540, %11541) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %11543 = torch.operator "onnx.Mul"(%11538, %11542) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11544 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11545 = torch.operator "onnx.Unsqueeze"(%11529, %11544) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %11546 = torch.operator "onnx.Add"(%11543, %11545) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11547 = torch.operator "onnx.MatMul"(%11546, %1019) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11548 = torch.operator "onnx.Add"(%486, %11547) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11549 = torch.operator "onnx.Mul"(%11548, %11548) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11550 = torch.operator "onnx.Mul"(%11548, %11549) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11551 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %11552 = torch.operator "onnx.Mul"(%11551, %11550) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11553 = torch.operator "onnx.Add"(%11548, %11552) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11554 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %11555 = torch.operator "onnx.Mul"(%11554, %11553) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11556 = torch.operator "onnx.Tanh"(%11555) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11557 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %11558 = torch.operator "onnx.Add"(%11557, %11556) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11559 = torch.operator "onnx.Mul"(%11548, %11558) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11560 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %11561 = torch.operator "onnx.Mul"(%11560, %11559) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11562 = torch.operator "onnx.Shape"(%11546) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %11563 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11564 = torch.operator "onnx.Gather"(%11562, %11563) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11565 = torch.operator "onnx.MatMul"(%11546, %1020) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11566 = torch.operator "onnx.Add"(%490, %11565) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11567 = torch.operator "onnx.MatMul"(%11546, %1021) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11568 = torch.operator "onnx.Add"(%491, %11567) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11569 = torch.operator "onnx.MatMul"(%11546, %1022) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11570 = torch.operator "onnx.Add"(%492, %11569) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11571 = torch.operator "onnx.Shape"(%11568) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %11572 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11573 = torch.operator "onnx.Gather"(%11571, %11572) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11574 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11575 = torch.operator "onnx.Div"(%11573, %11574) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11576 = torch.operator "onnx.Cast"(%11575) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11577 = torch.operator "onnx.Cast"(%11576) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11578 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16120_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11579 = torch.operator "onnx.Unsqueeze"(%11564, %11578) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11580 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11581 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11582 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16124_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11583 = torch.operator "onnx.Unsqueeze"(%11577, %11582) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11584 = torch.operator "onnx.Concat"(%11579, %11580, %11581, %11583) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %11585 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16127_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11586 = torch.operator "onnx.Unsqueeze"(%11564, %11585) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11587 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11588 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11589 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16131_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11590 = torch.operator "onnx.Unsqueeze"(%11577, %11589) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11591 = torch.operator "onnx.Concat"(%11586, %11587, %11588, %11590) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %11592 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16134_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11593 = torch.operator "onnx.Unsqueeze"(%11564, %11592) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11594 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11595 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11596 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16138_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11597 = torch.operator "onnx.Unsqueeze"(%11577, %11596) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11598 = torch.operator "onnx.Concat"(%11593, %11594, %11595, %11597) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %11599 = torch.operator "onnx.Reshape"(%11566, %11584) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %11600 = torch.operator "onnx.Transpose"(%11599) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %11601 = torch.operator "onnx.Reshape"(%11568, %11591) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %11602 = torch.operator "onnx.Transpose"(%11601) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %11603 = torch.operator "onnx.Reshape"(%11570, %11598) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %11604 = torch.operator "onnx.Transpose"(%11603) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %11605 = torch.operator "onnx.Cast"(%11600) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %11606 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %11607 = torch.operator "onnx.Pow"(%11605, %11606) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %11608 = torch.operator "onnx.ReduceMean"(%11607) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %11609 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %11610 = torch.operator "onnx.Add"(%11608, %11609) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %11611 = torch.operator "onnx.Sqrt"(%11610) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %11612 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %11613 = torch.operator "onnx.Div"(%11612, %11611) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %11614 = torch.operator "onnx.Cast"(%11600) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %11615 = torch.operator "onnx.Mul"(%11614, %11613) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %11616 = torch.operator "onnx.Cast"(%11615) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %11617 = torch.operator "onnx.Mul"(%11616, %488) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %11618 = torch.operator "onnx.Cast"(%11602) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %11619 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %11620 = torch.operator "onnx.Pow"(%11618, %11619) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %11621 = torch.operator "onnx.ReduceMean"(%11620) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %11622 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %11623 = torch.operator "onnx.Add"(%11621, %11622) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %11624 = torch.operator "onnx.Sqrt"(%11623) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %11625 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %11626 = torch.operator "onnx.Div"(%11625, %11624) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %11627 = torch.operator "onnx.Cast"(%11602) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %11628 = torch.operator "onnx.Mul"(%11627, %11626) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %11629 = torch.operator "onnx.Cast"(%11628) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %11630 = torch.operator "onnx.Mul"(%11629, %489) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %11631 = torch.operator "onnx.Shape"(%11617) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %11632 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11633 = torch.operator "onnx.Gather"(%11631, %11632) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11634 = torch.operator "onnx.Shape"(%11617) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %11635 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11636 = torch.operator "onnx.Gather"(%11634, %11635) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11637 = torch.operator "onnx.Shape"(%11617) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %11638 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11639 = torch.operator "onnx.Gather"(%11637, %11638) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11640 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16182_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11641 = torch.operator "onnx.Unsqueeze"(%11633, %11640) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11642 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16184_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11643 = torch.operator "onnx.Unsqueeze"(%11636, %11642) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11644 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16186_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11645 = torch.operator "onnx.Unsqueeze"(%11639, %11644) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11646 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11647 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11648 = torch.operator "onnx.Concat"(%11641, %11643, %11645, %11646, %11647) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %11649 = torch.operator "onnx.Reshape"(%11617, %11648) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %11650 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %11651:2 = torch.operator "onnx.Split"(%11649, %11650) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %11652 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11653 = torch.operator "onnx.Squeeze"(%11651#0, %11652) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %11654 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11655 = torch.operator "onnx.Squeeze"(%11651#1, %11654) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %11656 = torch.operator "onnx.Neg"(%11655) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %11657 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11658 = torch.operator "onnx.Unsqueeze"(%11656, %11657) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %11659 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11660 = torch.operator "onnx.Unsqueeze"(%11653, %11659) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %11661 = torch.operator "onnx.Concat"(%11658, %11660) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %11662 = torch.operator "onnx.Shape"(%11661) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %11663 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11664 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11665 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11666 = torch.operator "onnx.Slice"(%11662, %11664, %11665, %11663) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %11667 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11668 = torch.operator "onnx.Concat"(%11666, %11667) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %11669 = torch.operator "onnx.Reshape"(%11661, %11668) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %11670 = torch.operator "onnx.Cast"(%11617) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %11671 = torch.operator "onnx.Mul"(%11670, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %11672 = torch.operator "onnx.Cast"(%11669) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %11673 = torch.operator "onnx.Mul"(%11672, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %11674 = torch.operator "onnx.Add"(%11671, %11673) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %11675 = torch.operator "onnx.Cast"(%11674) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %11676 = torch.operator "onnx.Shape"(%11630) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %11677 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11678 = torch.operator "onnx.Gather"(%11676, %11677) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11679 = torch.operator "onnx.Shape"(%11630) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %11680 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11681 = torch.operator "onnx.Gather"(%11679, %11680) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11682 = torch.operator "onnx.Shape"(%11630) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %11683 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11684 = torch.operator "onnx.Gather"(%11682, %11683) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11685 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16227_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11686 = torch.operator "onnx.Unsqueeze"(%11678, %11685) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11687 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16229_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11688 = torch.operator "onnx.Unsqueeze"(%11681, %11687) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11689 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16231_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11690 = torch.operator "onnx.Unsqueeze"(%11684, %11689) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11691 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11692 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11693 = torch.operator "onnx.Concat"(%11686, %11688, %11690, %11691, %11692) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %11694 = torch.operator "onnx.Reshape"(%11630, %11693) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %11695 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %11696:2 = torch.operator "onnx.Split"(%11694, %11695) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %11697 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11698 = torch.operator "onnx.Squeeze"(%11696#0, %11697) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %11699 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11700 = torch.operator "onnx.Squeeze"(%11696#1, %11699) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %11701 = torch.operator "onnx.Neg"(%11700) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %11702 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11703 = torch.operator "onnx.Unsqueeze"(%11701, %11702) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %11704 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11705 = torch.operator "onnx.Unsqueeze"(%11698, %11704) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %11706 = torch.operator "onnx.Concat"(%11703, %11705) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %11707 = torch.operator "onnx.Shape"(%11706) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %11708 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11709 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11710 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11711 = torch.operator "onnx.Slice"(%11707, %11709, %11710, %11708) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %11712 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11713 = torch.operator "onnx.Concat"(%11711, %11712) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %11714 = torch.operator "onnx.Reshape"(%11706, %11713) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %11715 = torch.operator "onnx.Cast"(%11630) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %11716 = torch.operator "onnx.Mul"(%11715, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %11717 = torch.operator "onnx.Cast"(%11714) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %11718 = torch.operator "onnx.Mul"(%11717, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %11719 = torch.operator "onnx.Add"(%11716, %11718) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %11720 = torch.operator "onnx.Cast"(%11719) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %11721 = torch.operator "onnx.Shape"(%11675) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %11722 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11723 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11724 = torch.operator "onnx.Slice"(%11721, %11722, %11723) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11725 = torch.operator "onnx.Cast"(%11724) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %11726 = torch.operator "onnx.Sqrt"(%11725) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %11727 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %11728 = torch.operator "onnx.Cast"(%11726) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %11729 = torch.operator "onnx.Div"(%11727, %11728) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %11730 = torch.operator "onnx.Cast"(%11729) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %11731 = torch.operator "onnx.Transpose"(%11720) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %11732 = torch.operator "onnx.Sqrt"(%11730) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %11733 = torch.operator "onnx.Mul"(%11675, %11732) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %11734 = torch.operator "onnx.Sqrt"(%11730) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %11735 = torch.operator "onnx.Mul"(%11731, %11734) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %11736 = torch.operator "onnx.MatMul"(%11733, %11735) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %11737 = torch.operator "onnx.Softmax"(%11736) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %11738 = torch.operator "onnx.MatMul"(%11737, %11604) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %11739 = torch.operator "onnx.Transpose"(%11738) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %11740 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11741 = torch.operator "onnx.Mul"(%11577, %11740) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11742 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16284_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11743 = torch.operator "onnx.Unsqueeze"(%11564, %11742) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11744 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11745 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16287_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11746 = torch.operator "onnx.Unsqueeze"(%11741, %11745) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11747 = torch.operator "onnx.Concat"(%11743, %11744, %11746) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %11748 = torch.operator "onnx.Reshape"(%11739, %11747) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %11749 = torch.operator "onnx.Cast"(%11748) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %11750 = torch.operator "onnx.Concat"(%11749, %11561) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %11751 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.10_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11752 = torch.operator "onnx.Unsqueeze"(%11535, %11751) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %11753 = torch.operator "onnx.MatMul"(%11750, %1023) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11754 = torch.operator "onnx.Add"(%487, %11753) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11755 = torch.operator "onnx.Mul"(%11752, %11754) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11756 = torch.operator "onnx.Add"(%11517, %11755) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11757 = torch.operator "onnx.Gemm"(%1285, %493, %494) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %11758 = torch.operator "onnx.Shape"(%11757) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %11759 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11760 = torch.operator "onnx.Gather"(%11758, %11759) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11761 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11762 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11763 = torch.operator "onnx.Add"(%11760, %11762) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11764 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11765 = torch.operator "onnx.Div"(%11763, %11764) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11766 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11767 = torch.operator "onnx.Mul"(%11765, %11766) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11768 = torch.operator "onnx.Slice"(%11757, %11761, %11767, %11759) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %11769 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11770 = torch.operator "onnx.Mul"(%11765, %11769) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11771 = torch.operator "onnx.Slice"(%11757, %11767, %11770, %11759) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %11772 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11773 = torch.operator "onnx.Mul"(%11765, %11772) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11774 = torch.operator "onnx.Slice"(%11757, %11770, %11773, %11759) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.112Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.11/norm/norm/Constant_attr__value" : tensor<3072xbf16> %11775 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.112Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.112Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.11/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %11776 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.112Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %11777 = torch.operator "onnx.LayerNormalization"(%11756, %11775, %11776) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11778 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11779 = torch.operator "onnx.Unsqueeze"(%11771, %11778) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %11780 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %11781 = torch.operator "onnx.Add"(%11779, %11780) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %11782 = torch.operator "onnx.Mul"(%11777, %11781) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11783 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11784 = torch.operator "onnx.Unsqueeze"(%11768, %11783) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %11785 = torch.operator "onnx.Add"(%11782, %11784) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11786 = torch.operator "onnx.MatMul"(%11785, %1024) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11787 = torch.operator "onnx.Add"(%495, %11786) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11788 = torch.operator "onnx.Mul"(%11787, %11787) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11789 = torch.operator "onnx.Mul"(%11787, %11788) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11790 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %11791 = torch.operator "onnx.Mul"(%11790, %11789) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11792 = torch.operator "onnx.Add"(%11787, %11791) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11793 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %11794 = torch.operator "onnx.Mul"(%11793, %11792) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11795 = torch.operator "onnx.Tanh"(%11794) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11796 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %11797 = torch.operator "onnx.Add"(%11796, %11795) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11798 = torch.operator "onnx.Mul"(%11787, %11797) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11799 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %11800 = torch.operator "onnx.Mul"(%11799, %11798) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %11801 = torch.operator "onnx.Shape"(%11785) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %11802 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11803 = torch.operator "onnx.Gather"(%11801, %11802) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11804 = torch.operator "onnx.MatMul"(%11785, %1025) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11805 = torch.operator "onnx.Add"(%499, %11804) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11806 = torch.operator "onnx.MatMul"(%11785, %1026) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11807 = torch.operator "onnx.Add"(%500, %11806) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11808 = torch.operator "onnx.MatMul"(%11785, %1027) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11809 = torch.operator "onnx.Add"(%501, %11808) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11810 = torch.operator "onnx.Shape"(%11807) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %11811 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11812 = torch.operator "onnx.Gather"(%11810, %11811) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11813 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11814 = torch.operator "onnx.Div"(%11812, %11813) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11815 = torch.operator "onnx.Cast"(%11814) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11816 = torch.operator "onnx.Cast"(%11815) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11817 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16359_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11818 = torch.operator "onnx.Unsqueeze"(%11803, %11817) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11819 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11820 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11821 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16363_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11822 = torch.operator "onnx.Unsqueeze"(%11816, %11821) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11823 = torch.operator "onnx.Concat"(%11818, %11819, %11820, %11822) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %11824 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16366_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11825 = torch.operator "onnx.Unsqueeze"(%11803, %11824) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11826 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11827 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11828 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16370_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11829 = torch.operator "onnx.Unsqueeze"(%11816, %11828) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11830 = torch.operator "onnx.Concat"(%11825, %11826, %11827, %11829) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %11831 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16373_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11832 = torch.operator "onnx.Unsqueeze"(%11803, %11831) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11833 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11834 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11835 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16377_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11836 = torch.operator "onnx.Unsqueeze"(%11816, %11835) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11837 = torch.operator "onnx.Concat"(%11832, %11833, %11834, %11836) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %11838 = torch.operator "onnx.Reshape"(%11805, %11823) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %11839 = torch.operator "onnx.Transpose"(%11838) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %11840 = torch.operator "onnx.Reshape"(%11807, %11830) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %11841 = torch.operator "onnx.Transpose"(%11840) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %11842 = torch.operator "onnx.Reshape"(%11809, %11837) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %11843 = torch.operator "onnx.Transpose"(%11842) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %11844 = torch.operator "onnx.Cast"(%11839) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %11845 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %11846 = torch.operator "onnx.Pow"(%11844, %11845) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %11847 = torch.operator "onnx.ReduceMean"(%11846) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %11848 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %11849 = torch.operator "onnx.Add"(%11847, %11848) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %11850 = torch.operator "onnx.Sqrt"(%11849) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %11851 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %11852 = torch.operator "onnx.Div"(%11851, %11850) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %11853 = torch.operator "onnx.Cast"(%11839) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %11854 = torch.operator "onnx.Mul"(%11853, %11852) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %11855 = torch.operator "onnx.Cast"(%11854) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %11856 = torch.operator "onnx.Mul"(%11855, %497) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %11857 = torch.operator "onnx.Cast"(%11841) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %11858 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %11859 = torch.operator "onnx.Pow"(%11857, %11858) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %11860 = torch.operator "onnx.ReduceMean"(%11859) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %11861 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %11862 = torch.operator "onnx.Add"(%11860, %11861) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %11863 = torch.operator "onnx.Sqrt"(%11862) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %11864 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %11865 = torch.operator "onnx.Div"(%11864, %11863) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %11866 = torch.operator "onnx.Cast"(%11841) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %11867 = torch.operator "onnx.Mul"(%11866, %11865) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %11868 = torch.operator "onnx.Cast"(%11867) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %11869 = torch.operator "onnx.Mul"(%11868, %498) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %11870 = torch.operator "onnx.Shape"(%11856) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %11871 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11872 = torch.operator "onnx.Gather"(%11870, %11871) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11873 = torch.operator "onnx.Shape"(%11856) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %11874 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11875 = torch.operator "onnx.Gather"(%11873, %11874) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11876 = torch.operator "onnx.Shape"(%11856) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %11877 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11878 = torch.operator "onnx.Gather"(%11876, %11877) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11879 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16421_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11880 = torch.operator "onnx.Unsqueeze"(%11872, %11879) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11881 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16423_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11882 = torch.operator "onnx.Unsqueeze"(%11875, %11881) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11883 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16425_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11884 = torch.operator "onnx.Unsqueeze"(%11878, %11883) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11885 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11886 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11887 = torch.operator "onnx.Concat"(%11880, %11882, %11884, %11885, %11886) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %11888 = torch.operator "onnx.Reshape"(%11856, %11887) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %11889 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %11890:2 = torch.operator "onnx.Split"(%11888, %11889) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %11891 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11892 = torch.operator "onnx.Squeeze"(%11890#0, %11891) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %11893 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11894 = torch.operator "onnx.Squeeze"(%11890#1, %11893) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %11895 = torch.operator "onnx.Neg"(%11894) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %11896 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11897 = torch.operator "onnx.Unsqueeze"(%11895, %11896) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %11898 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11899 = torch.operator "onnx.Unsqueeze"(%11892, %11898) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %11900 = torch.operator "onnx.Concat"(%11897, %11899) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %11901 = torch.operator "onnx.Shape"(%11900) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %11902 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11903 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11904 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11905 = torch.operator "onnx.Slice"(%11901, %11903, %11904, %11902) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %11906 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11907 = torch.operator "onnx.Concat"(%11905, %11906) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %11908 = torch.operator "onnx.Reshape"(%11900, %11907) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %11909 = torch.operator "onnx.Cast"(%11856) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %11910 = torch.operator "onnx.Mul"(%11909, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %11911 = torch.operator "onnx.Cast"(%11908) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %11912 = torch.operator "onnx.Mul"(%11911, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %11913 = torch.operator "onnx.Add"(%11910, %11912) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %11914 = torch.operator "onnx.Cast"(%11913) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %11915 = torch.operator "onnx.Shape"(%11869) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %11916 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11917 = torch.operator "onnx.Gather"(%11915, %11916) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11918 = torch.operator "onnx.Shape"(%11869) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %11919 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11920 = torch.operator "onnx.Gather"(%11918, %11919) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11921 = torch.operator "onnx.Shape"(%11869) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %11922 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11923 = torch.operator "onnx.Gather"(%11921, %11922) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11924 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16466_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11925 = torch.operator "onnx.Unsqueeze"(%11917, %11924) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11926 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16468_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11927 = torch.operator "onnx.Unsqueeze"(%11920, %11926) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11928 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16470_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11929 = torch.operator "onnx.Unsqueeze"(%11923, %11928) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11930 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11931 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11932 = torch.operator "onnx.Concat"(%11925, %11927, %11929, %11930, %11931) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %11933 = torch.operator "onnx.Reshape"(%11869, %11932) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %11934 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %11935:2 = torch.operator "onnx.Split"(%11933, %11934) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %11936 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11937 = torch.operator "onnx.Squeeze"(%11935#0, %11936) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %11938 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11939 = torch.operator "onnx.Squeeze"(%11935#1, %11938) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %11940 = torch.operator "onnx.Neg"(%11939) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %11941 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11942 = torch.operator "onnx.Unsqueeze"(%11940, %11941) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %11943 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11944 = torch.operator "onnx.Unsqueeze"(%11937, %11943) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %11945 = torch.operator "onnx.Concat"(%11942, %11944) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %11946 = torch.operator "onnx.Shape"(%11945) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %11947 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11948 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11949 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11950 = torch.operator "onnx.Slice"(%11946, %11948, %11949, %11947) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %11951 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11952 = torch.operator "onnx.Concat"(%11950, %11951) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %11953 = torch.operator "onnx.Reshape"(%11945, %11952) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %11954 = torch.operator "onnx.Cast"(%11869) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %11955 = torch.operator "onnx.Mul"(%11954, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %11956 = torch.operator "onnx.Cast"(%11953) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %11957 = torch.operator "onnx.Mul"(%11956, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %11958 = torch.operator "onnx.Add"(%11955, %11957) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %11959 = torch.operator "onnx.Cast"(%11958) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %11960 = torch.operator "onnx.Shape"(%11914) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %11961 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11962 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11963 = torch.operator "onnx.Slice"(%11960, %11961, %11962) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11964 = torch.operator "onnx.Cast"(%11963) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %11965 = torch.operator "onnx.Sqrt"(%11964) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %11966 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %11967 = torch.operator "onnx.Cast"(%11965) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %11968 = torch.operator "onnx.Div"(%11966, %11967) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %11969 = torch.operator "onnx.Cast"(%11968) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %11970 = torch.operator "onnx.Transpose"(%11959) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %11971 = torch.operator "onnx.Sqrt"(%11969) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %11972 = torch.operator "onnx.Mul"(%11914, %11971) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %11973 = torch.operator "onnx.Sqrt"(%11969) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %11974 = torch.operator "onnx.Mul"(%11970, %11973) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %11975 = torch.operator "onnx.MatMul"(%11972, %11974) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %11976 = torch.operator "onnx.Softmax"(%11975) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %11977 = torch.operator "onnx.MatMul"(%11976, %11843) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %11978 = torch.operator "onnx.Transpose"(%11977) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %11979 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %11980 = torch.operator "onnx.Mul"(%11816, %11979) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %11981 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16523_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11982 = torch.operator "onnx.Unsqueeze"(%11803, %11981) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11983 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11984 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16526_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11985 = torch.operator "onnx.Unsqueeze"(%11980, %11984) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %11986 = torch.operator "onnx.Concat"(%11982, %11983, %11985) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %11987 = torch.operator "onnx.Reshape"(%11978, %11986) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %11988 = torch.operator "onnx.Cast"(%11987) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %11989 = torch.operator "onnx.Concat"(%11988, %11800) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %11990 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.11_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11991 = torch.operator "onnx.Unsqueeze"(%11774, %11990) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %11992 = torch.operator "onnx.MatMul"(%11989, %1028) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11993 = torch.operator "onnx.Add"(%496, %11992) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11994 = torch.operator "onnx.Mul"(%11991, %11993) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11995 = torch.operator "onnx.Add"(%11756, %11994) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %11996 = torch.operator "onnx.Gemm"(%1285, %502, %503) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %11997 = torch.operator "onnx.Shape"(%11996) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %11998 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %11999 = torch.operator "onnx.Gather"(%11997, %11998) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12000 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12001 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12002 = torch.operator "onnx.Add"(%11999, %12001) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12003 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12004 = torch.operator "onnx.Div"(%12002, %12003) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12005 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12006 = torch.operator "onnx.Mul"(%12004, %12005) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12007 = torch.operator "onnx.Slice"(%11996, %12000, %12006, %11998) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %12008 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12009 = torch.operator "onnx.Mul"(%12004, %12008) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12010 = torch.operator "onnx.Slice"(%11996, %12006, %12009, %11998) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %12011 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12012 = torch.operator "onnx.Mul"(%12004, %12011) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12013 = torch.operator "onnx.Slice"(%11996, %12009, %12012, %11998) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.122Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.12/norm/norm/Constant_attr__value" : tensor<3072xbf16> %12014 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.122Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.122Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.12/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %12015 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.122Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %12016 = torch.operator "onnx.LayerNormalization"(%11995, %12014, %12015) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12017 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12018 = torch.operator "onnx.Unsqueeze"(%12010, %12017) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %12019 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %12020 = torch.operator "onnx.Add"(%12018, %12019) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %12021 = torch.operator "onnx.Mul"(%12016, %12020) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12022 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12023 = torch.operator "onnx.Unsqueeze"(%12007, %12022) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %12024 = torch.operator "onnx.Add"(%12021, %12023) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12025 = torch.operator "onnx.MatMul"(%12024, %1029) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12026 = torch.operator "onnx.Add"(%504, %12025) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12027 = torch.operator "onnx.Mul"(%12026, %12026) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12028 = torch.operator "onnx.Mul"(%12026, %12027) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12029 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %12030 = torch.operator "onnx.Mul"(%12029, %12028) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12031 = torch.operator "onnx.Add"(%12026, %12030) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12032 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %12033 = torch.operator "onnx.Mul"(%12032, %12031) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12034 = torch.operator "onnx.Tanh"(%12033) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12035 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %12036 = torch.operator "onnx.Add"(%12035, %12034) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12037 = torch.operator "onnx.Mul"(%12026, %12036) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12038 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %12039 = torch.operator "onnx.Mul"(%12038, %12037) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12040 = torch.operator "onnx.Shape"(%12024) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %12041 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12042 = torch.operator "onnx.Gather"(%12040, %12041) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12043 = torch.operator "onnx.MatMul"(%12024, %1030) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12044 = torch.operator "onnx.Add"(%508, %12043) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12045 = torch.operator "onnx.MatMul"(%12024, %1031) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12046 = torch.operator "onnx.Add"(%509, %12045) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12047 = torch.operator "onnx.MatMul"(%12024, %1032) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12048 = torch.operator "onnx.Add"(%510, %12047) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12049 = torch.operator "onnx.Shape"(%12046) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %12050 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12051 = torch.operator "onnx.Gather"(%12049, %12050) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12052 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12053 = torch.operator "onnx.Div"(%12051, %12052) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12054 = torch.operator "onnx.Cast"(%12053) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12055 = torch.operator "onnx.Cast"(%12054) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12056 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16598_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12057 = torch.operator "onnx.Unsqueeze"(%12042, %12056) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12058 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12059 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12060 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16602_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12061 = torch.operator "onnx.Unsqueeze"(%12055, %12060) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12062 = torch.operator "onnx.Concat"(%12057, %12058, %12059, %12061) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %12063 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16605_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12064 = torch.operator "onnx.Unsqueeze"(%12042, %12063) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12065 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12066 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12067 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16609_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12068 = torch.operator "onnx.Unsqueeze"(%12055, %12067) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12069 = torch.operator "onnx.Concat"(%12064, %12065, %12066, %12068) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %12070 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16612_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12071 = torch.operator "onnx.Unsqueeze"(%12042, %12070) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12072 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12073 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12074 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16616_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12075 = torch.operator "onnx.Unsqueeze"(%12055, %12074) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12076 = torch.operator "onnx.Concat"(%12071, %12072, %12073, %12075) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %12077 = torch.operator "onnx.Reshape"(%12044, %12062) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %12078 = torch.operator "onnx.Transpose"(%12077) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %12079 = torch.operator "onnx.Reshape"(%12046, %12069) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %12080 = torch.operator "onnx.Transpose"(%12079) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %12081 = torch.operator "onnx.Reshape"(%12048, %12076) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %12082 = torch.operator "onnx.Transpose"(%12081) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %12083 = torch.operator "onnx.Cast"(%12078) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %12084 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %12085 = torch.operator "onnx.Pow"(%12083, %12084) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %12086 = torch.operator "onnx.ReduceMean"(%12085) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %12087 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %12088 = torch.operator "onnx.Add"(%12086, %12087) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %12089 = torch.operator "onnx.Sqrt"(%12088) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %12090 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %12091 = torch.operator "onnx.Div"(%12090, %12089) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %12092 = torch.operator "onnx.Cast"(%12078) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %12093 = torch.operator "onnx.Mul"(%12092, %12091) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %12094 = torch.operator "onnx.Cast"(%12093) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %12095 = torch.operator "onnx.Mul"(%12094, %506) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %12096 = torch.operator "onnx.Cast"(%12080) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %12097 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %12098 = torch.operator "onnx.Pow"(%12096, %12097) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %12099 = torch.operator "onnx.ReduceMean"(%12098) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %12100 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %12101 = torch.operator "onnx.Add"(%12099, %12100) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %12102 = torch.operator "onnx.Sqrt"(%12101) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %12103 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %12104 = torch.operator "onnx.Div"(%12103, %12102) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %12105 = torch.operator "onnx.Cast"(%12080) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %12106 = torch.operator "onnx.Mul"(%12105, %12104) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %12107 = torch.operator "onnx.Cast"(%12106) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %12108 = torch.operator "onnx.Mul"(%12107, %507) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %12109 = torch.operator "onnx.Shape"(%12095) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %12110 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12111 = torch.operator "onnx.Gather"(%12109, %12110) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12112 = torch.operator "onnx.Shape"(%12095) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %12113 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12114 = torch.operator "onnx.Gather"(%12112, %12113) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12115 = torch.operator "onnx.Shape"(%12095) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %12116 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12117 = torch.operator "onnx.Gather"(%12115, %12116) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12118 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16660_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12119 = torch.operator "onnx.Unsqueeze"(%12111, %12118) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12120 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16662_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12121 = torch.operator "onnx.Unsqueeze"(%12114, %12120) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12122 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16664_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12123 = torch.operator "onnx.Unsqueeze"(%12117, %12122) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12124 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12125 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12126 = torch.operator "onnx.Concat"(%12119, %12121, %12123, %12124, %12125) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %12127 = torch.operator "onnx.Reshape"(%12095, %12126) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %12128 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %12129:2 = torch.operator "onnx.Split"(%12127, %12128) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %12130 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12131 = torch.operator "onnx.Squeeze"(%12129#0, %12130) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %12132 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12133 = torch.operator "onnx.Squeeze"(%12129#1, %12132) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %12134 = torch.operator "onnx.Neg"(%12133) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %12135 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12136 = torch.operator "onnx.Unsqueeze"(%12134, %12135) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %12137 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12138 = torch.operator "onnx.Unsqueeze"(%12131, %12137) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %12139 = torch.operator "onnx.Concat"(%12136, %12138) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %12140 = torch.operator "onnx.Shape"(%12139) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %12141 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12142 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12143 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12144 = torch.operator "onnx.Slice"(%12140, %12142, %12143, %12141) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %12145 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12146 = torch.operator "onnx.Concat"(%12144, %12145) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %12147 = torch.operator "onnx.Reshape"(%12139, %12146) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %12148 = torch.operator "onnx.Cast"(%12095) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %12149 = torch.operator "onnx.Mul"(%12148, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %12150 = torch.operator "onnx.Cast"(%12147) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %12151 = torch.operator "onnx.Mul"(%12150, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %12152 = torch.operator "onnx.Add"(%12149, %12151) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %12153 = torch.operator "onnx.Cast"(%12152) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %12154 = torch.operator "onnx.Shape"(%12108) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %12155 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12156 = torch.operator "onnx.Gather"(%12154, %12155) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12157 = torch.operator "onnx.Shape"(%12108) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %12158 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12159 = torch.operator "onnx.Gather"(%12157, %12158) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12160 = torch.operator "onnx.Shape"(%12108) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %12161 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12162 = torch.operator "onnx.Gather"(%12160, %12161) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12163 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16705_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12164 = torch.operator "onnx.Unsqueeze"(%12156, %12163) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12165 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16707_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12166 = torch.operator "onnx.Unsqueeze"(%12159, %12165) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12167 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16709_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12168 = torch.operator "onnx.Unsqueeze"(%12162, %12167) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12169 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12170 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12171 = torch.operator "onnx.Concat"(%12164, %12166, %12168, %12169, %12170) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %12172 = torch.operator "onnx.Reshape"(%12108, %12171) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %12173 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %12174:2 = torch.operator "onnx.Split"(%12172, %12173) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %12175 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12176 = torch.operator "onnx.Squeeze"(%12174#0, %12175) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %12177 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12178 = torch.operator "onnx.Squeeze"(%12174#1, %12177) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %12179 = torch.operator "onnx.Neg"(%12178) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %12180 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12181 = torch.operator "onnx.Unsqueeze"(%12179, %12180) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %12182 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12183 = torch.operator "onnx.Unsqueeze"(%12176, %12182) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %12184 = torch.operator "onnx.Concat"(%12181, %12183) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %12185 = torch.operator "onnx.Shape"(%12184) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %12186 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12187 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12188 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12189 = torch.operator "onnx.Slice"(%12185, %12187, %12188, %12186) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %12190 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12191 = torch.operator "onnx.Concat"(%12189, %12190) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %12192 = torch.operator "onnx.Reshape"(%12184, %12191) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %12193 = torch.operator "onnx.Cast"(%12108) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %12194 = torch.operator "onnx.Mul"(%12193, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %12195 = torch.operator "onnx.Cast"(%12192) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %12196 = torch.operator "onnx.Mul"(%12195, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %12197 = torch.operator "onnx.Add"(%12194, %12196) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %12198 = torch.operator "onnx.Cast"(%12197) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %12199 = torch.operator "onnx.Shape"(%12153) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %12200 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12201 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12202 = torch.operator "onnx.Slice"(%12199, %12200, %12201) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12203 = torch.operator "onnx.Cast"(%12202) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %12204 = torch.operator "onnx.Sqrt"(%12203) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %12205 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %12206 = torch.operator "onnx.Cast"(%12204) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %12207 = torch.operator "onnx.Div"(%12205, %12206) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %12208 = torch.operator "onnx.Cast"(%12207) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %12209 = torch.operator "onnx.Transpose"(%12198) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %12210 = torch.operator "onnx.Sqrt"(%12208) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %12211 = torch.operator "onnx.Mul"(%12153, %12210) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %12212 = torch.operator "onnx.Sqrt"(%12208) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %12213 = torch.operator "onnx.Mul"(%12209, %12212) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %12214 = torch.operator "onnx.MatMul"(%12211, %12213) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %12215 = torch.operator "onnx.Softmax"(%12214) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %12216 = torch.operator "onnx.MatMul"(%12215, %12082) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %12217 = torch.operator "onnx.Transpose"(%12216) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %12218 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12219 = torch.operator "onnx.Mul"(%12055, %12218) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12220 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16762_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12221 = torch.operator "onnx.Unsqueeze"(%12042, %12220) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12222 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12223 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16765_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12224 = torch.operator "onnx.Unsqueeze"(%12219, %12223) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12225 = torch.operator "onnx.Concat"(%12221, %12222, %12224) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %12226 = torch.operator "onnx.Reshape"(%12217, %12225) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %12227 = torch.operator "onnx.Cast"(%12226) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %12228 = torch.operator "onnx.Concat"(%12227, %12039) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %12229 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.12_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12230 = torch.operator "onnx.Unsqueeze"(%12013, %12229) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %12231 = torch.operator "onnx.MatMul"(%12228, %1033) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12232 = torch.operator "onnx.Add"(%505, %12231) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12233 = torch.operator "onnx.Mul"(%12230, %12232) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12234 = torch.operator "onnx.Add"(%11995, %12233) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12235 = torch.operator "onnx.Gemm"(%1285, %511, %512) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %12236 = torch.operator "onnx.Shape"(%12235) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %12237 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12238 = torch.operator "onnx.Gather"(%12236, %12237) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12239 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12240 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12241 = torch.operator "onnx.Add"(%12238, %12240) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12242 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12243 = torch.operator "onnx.Div"(%12241, %12242) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12244 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12245 = torch.operator "onnx.Mul"(%12243, %12244) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12246 = torch.operator "onnx.Slice"(%12235, %12239, %12245, %12237) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %12247 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12248 = torch.operator "onnx.Mul"(%12243, %12247) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12249 = torch.operator "onnx.Slice"(%12235, %12245, %12248, %12237) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %12250 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12251 = torch.operator "onnx.Mul"(%12243, %12250) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12252 = torch.operator "onnx.Slice"(%12235, %12248, %12251, %12237) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.132Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.13/norm/norm/Constant_attr__value" : tensor<3072xbf16> %12253 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.132Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.132Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.13/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %12254 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.132Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %12255 = torch.operator "onnx.LayerNormalization"(%12234, %12253, %12254) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12256 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12257 = torch.operator "onnx.Unsqueeze"(%12249, %12256) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %12258 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %12259 = torch.operator "onnx.Add"(%12257, %12258) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %12260 = torch.operator "onnx.Mul"(%12255, %12259) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12261 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12262 = torch.operator "onnx.Unsqueeze"(%12246, %12261) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %12263 = torch.operator "onnx.Add"(%12260, %12262) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12264 = torch.operator "onnx.MatMul"(%12263, %1034) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12265 = torch.operator "onnx.Add"(%513, %12264) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12266 = torch.operator "onnx.Mul"(%12265, %12265) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12267 = torch.operator "onnx.Mul"(%12265, %12266) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12268 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %12269 = torch.operator "onnx.Mul"(%12268, %12267) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12270 = torch.operator "onnx.Add"(%12265, %12269) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12271 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %12272 = torch.operator "onnx.Mul"(%12271, %12270) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12273 = torch.operator "onnx.Tanh"(%12272) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12274 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %12275 = torch.operator "onnx.Add"(%12274, %12273) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12276 = torch.operator "onnx.Mul"(%12265, %12275) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12277 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %12278 = torch.operator "onnx.Mul"(%12277, %12276) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12279 = torch.operator "onnx.Shape"(%12263) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %12280 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12281 = torch.operator "onnx.Gather"(%12279, %12280) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12282 = torch.operator "onnx.MatMul"(%12263, %1035) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12283 = torch.operator "onnx.Add"(%517, %12282) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12284 = torch.operator "onnx.MatMul"(%12263, %1036) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12285 = torch.operator "onnx.Add"(%518, %12284) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12286 = torch.operator "onnx.MatMul"(%12263, %1037) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12287 = torch.operator "onnx.Add"(%519, %12286) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12288 = torch.operator "onnx.Shape"(%12285) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %12289 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12290 = torch.operator "onnx.Gather"(%12288, %12289) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12291 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12292 = torch.operator "onnx.Div"(%12290, %12291) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12293 = torch.operator "onnx.Cast"(%12292) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12294 = torch.operator "onnx.Cast"(%12293) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12295 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16837_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12296 = torch.operator "onnx.Unsqueeze"(%12281, %12295) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12297 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12298 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12299 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16841_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12300 = torch.operator "onnx.Unsqueeze"(%12294, %12299) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12301 = torch.operator "onnx.Concat"(%12296, %12297, %12298, %12300) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %12302 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16844_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12303 = torch.operator "onnx.Unsqueeze"(%12281, %12302) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12304 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12305 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12306 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16848_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12307 = torch.operator "onnx.Unsqueeze"(%12294, %12306) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12308 = torch.operator "onnx.Concat"(%12303, %12304, %12305, %12307) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %12309 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16851_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12310 = torch.operator "onnx.Unsqueeze"(%12281, %12309) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12311 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12312 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12313 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16855_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12314 = torch.operator "onnx.Unsqueeze"(%12294, %12313) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12315 = torch.operator "onnx.Concat"(%12310, %12311, %12312, %12314) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %12316 = torch.operator "onnx.Reshape"(%12283, %12301) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %12317 = torch.operator "onnx.Transpose"(%12316) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %12318 = torch.operator "onnx.Reshape"(%12285, %12308) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %12319 = torch.operator "onnx.Transpose"(%12318) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %12320 = torch.operator "onnx.Reshape"(%12287, %12315) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %12321 = torch.operator "onnx.Transpose"(%12320) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %12322 = torch.operator "onnx.Cast"(%12317) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %12323 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %12324 = torch.operator "onnx.Pow"(%12322, %12323) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %12325 = torch.operator "onnx.ReduceMean"(%12324) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %12326 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %12327 = torch.operator "onnx.Add"(%12325, %12326) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %12328 = torch.operator "onnx.Sqrt"(%12327) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %12329 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %12330 = torch.operator "onnx.Div"(%12329, %12328) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %12331 = torch.operator "onnx.Cast"(%12317) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %12332 = torch.operator "onnx.Mul"(%12331, %12330) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %12333 = torch.operator "onnx.Cast"(%12332) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %12334 = torch.operator "onnx.Mul"(%12333, %515) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %12335 = torch.operator "onnx.Cast"(%12319) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %12336 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %12337 = torch.operator "onnx.Pow"(%12335, %12336) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %12338 = torch.operator "onnx.ReduceMean"(%12337) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %12339 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %12340 = torch.operator "onnx.Add"(%12338, %12339) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %12341 = torch.operator "onnx.Sqrt"(%12340) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %12342 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %12343 = torch.operator "onnx.Div"(%12342, %12341) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %12344 = torch.operator "onnx.Cast"(%12319) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %12345 = torch.operator "onnx.Mul"(%12344, %12343) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %12346 = torch.operator "onnx.Cast"(%12345) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %12347 = torch.operator "onnx.Mul"(%12346, %516) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %12348 = torch.operator "onnx.Shape"(%12334) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %12349 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12350 = torch.operator "onnx.Gather"(%12348, %12349) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12351 = torch.operator "onnx.Shape"(%12334) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %12352 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12353 = torch.operator "onnx.Gather"(%12351, %12352) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12354 = torch.operator "onnx.Shape"(%12334) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %12355 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12356 = torch.operator "onnx.Gather"(%12354, %12355) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12357 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16899_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12358 = torch.operator "onnx.Unsqueeze"(%12350, %12357) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12359 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16901_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12360 = torch.operator "onnx.Unsqueeze"(%12353, %12359) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12361 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16903_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12362 = torch.operator "onnx.Unsqueeze"(%12356, %12361) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12363 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12364 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12365 = torch.operator "onnx.Concat"(%12358, %12360, %12362, %12363, %12364) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %12366 = torch.operator "onnx.Reshape"(%12334, %12365) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %12367 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %12368:2 = torch.operator "onnx.Split"(%12366, %12367) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %12369 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12370 = torch.operator "onnx.Squeeze"(%12368#0, %12369) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %12371 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12372 = torch.operator "onnx.Squeeze"(%12368#1, %12371) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %12373 = torch.operator "onnx.Neg"(%12372) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %12374 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12375 = torch.operator "onnx.Unsqueeze"(%12373, %12374) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %12376 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12377 = torch.operator "onnx.Unsqueeze"(%12370, %12376) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %12378 = torch.operator "onnx.Concat"(%12375, %12377) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %12379 = torch.operator "onnx.Shape"(%12378) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %12380 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12381 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12382 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12383 = torch.operator "onnx.Slice"(%12379, %12381, %12382, %12380) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %12384 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12385 = torch.operator "onnx.Concat"(%12383, %12384) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %12386 = torch.operator "onnx.Reshape"(%12378, %12385) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %12387 = torch.operator "onnx.Cast"(%12334) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %12388 = torch.operator "onnx.Mul"(%12387, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %12389 = torch.operator "onnx.Cast"(%12386) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %12390 = torch.operator "onnx.Mul"(%12389, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %12391 = torch.operator "onnx.Add"(%12388, %12390) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %12392 = torch.operator "onnx.Cast"(%12391) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %12393 = torch.operator "onnx.Shape"(%12347) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %12394 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12395 = torch.operator "onnx.Gather"(%12393, %12394) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12396 = torch.operator "onnx.Shape"(%12347) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %12397 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12398 = torch.operator "onnx.Gather"(%12396, %12397) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12399 = torch.operator "onnx.Shape"(%12347) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %12400 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12401 = torch.operator "onnx.Gather"(%12399, %12400) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12402 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16944_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12403 = torch.operator "onnx.Unsqueeze"(%12395, %12402) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12404 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16946_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12405 = torch.operator "onnx.Unsqueeze"(%12398, %12404) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12406 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_16948_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12407 = torch.operator "onnx.Unsqueeze"(%12401, %12406) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12408 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12409 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12410 = torch.operator "onnx.Concat"(%12403, %12405, %12407, %12408, %12409) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %12411 = torch.operator "onnx.Reshape"(%12347, %12410) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %12412 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %12413:2 = torch.operator "onnx.Split"(%12411, %12412) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %12414 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12415 = torch.operator "onnx.Squeeze"(%12413#0, %12414) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %12416 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12417 = torch.operator "onnx.Squeeze"(%12413#1, %12416) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %12418 = torch.operator "onnx.Neg"(%12417) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %12419 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12420 = torch.operator "onnx.Unsqueeze"(%12418, %12419) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %12421 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12422 = torch.operator "onnx.Unsqueeze"(%12415, %12421) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %12423 = torch.operator "onnx.Concat"(%12420, %12422) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %12424 = torch.operator "onnx.Shape"(%12423) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %12425 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12426 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12427 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12428 = torch.operator "onnx.Slice"(%12424, %12426, %12427, %12425) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %12429 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12430 = torch.operator "onnx.Concat"(%12428, %12429) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %12431 = torch.operator "onnx.Reshape"(%12423, %12430) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %12432 = torch.operator "onnx.Cast"(%12347) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %12433 = torch.operator "onnx.Mul"(%12432, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %12434 = torch.operator "onnx.Cast"(%12431) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %12435 = torch.operator "onnx.Mul"(%12434, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %12436 = torch.operator "onnx.Add"(%12433, %12435) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %12437 = torch.operator "onnx.Cast"(%12436) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %12438 = torch.operator "onnx.Shape"(%12392) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %12439 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12440 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12441 = torch.operator "onnx.Slice"(%12438, %12439, %12440) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12442 = torch.operator "onnx.Cast"(%12441) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %12443 = torch.operator "onnx.Sqrt"(%12442) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %12444 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %12445 = torch.operator "onnx.Cast"(%12443) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %12446 = torch.operator "onnx.Div"(%12444, %12445) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %12447 = torch.operator "onnx.Cast"(%12446) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %12448 = torch.operator "onnx.Transpose"(%12437) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %12449 = torch.operator "onnx.Sqrt"(%12447) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %12450 = torch.operator "onnx.Mul"(%12392, %12449) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %12451 = torch.operator "onnx.Sqrt"(%12447) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %12452 = torch.operator "onnx.Mul"(%12448, %12451) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %12453 = torch.operator "onnx.MatMul"(%12450, %12452) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %12454 = torch.operator "onnx.Softmax"(%12453) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %12455 = torch.operator "onnx.MatMul"(%12454, %12321) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %12456 = torch.operator "onnx.Transpose"(%12455) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %12457 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12458 = torch.operator "onnx.Mul"(%12294, %12457) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12459 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17001_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12460 = torch.operator "onnx.Unsqueeze"(%12281, %12459) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12461 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12462 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17004_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12463 = torch.operator "onnx.Unsqueeze"(%12458, %12462) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12464 = torch.operator "onnx.Concat"(%12460, %12461, %12463) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %12465 = torch.operator "onnx.Reshape"(%12456, %12464) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %12466 = torch.operator "onnx.Cast"(%12465) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %12467 = torch.operator "onnx.Concat"(%12466, %12278) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %12468 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.13_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12469 = torch.operator "onnx.Unsqueeze"(%12252, %12468) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %12470 = torch.operator "onnx.MatMul"(%12467, %1038) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12471 = torch.operator "onnx.Add"(%514, %12470) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12472 = torch.operator "onnx.Mul"(%12469, %12471) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12473 = torch.operator "onnx.Add"(%12234, %12472) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12474 = torch.operator "onnx.Gemm"(%1285, %520, %521) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %12475 = torch.operator "onnx.Shape"(%12474) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %12476 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12477 = torch.operator "onnx.Gather"(%12475, %12476) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12478 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12479 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12480 = torch.operator "onnx.Add"(%12477, %12479) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12481 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12482 = torch.operator "onnx.Div"(%12480, %12481) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12483 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12484 = torch.operator "onnx.Mul"(%12482, %12483) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12485 = torch.operator "onnx.Slice"(%12474, %12478, %12484, %12476) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %12486 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12487 = torch.operator "onnx.Mul"(%12482, %12486) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12488 = torch.operator "onnx.Slice"(%12474, %12484, %12487, %12476) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %12489 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12490 = torch.operator "onnx.Mul"(%12482, %12489) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12491 = torch.operator "onnx.Slice"(%12474, %12487, %12490, %12476) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.142Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.14/norm/norm/Constant_attr__value" : tensor<3072xbf16> %12492 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.142Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.142Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.14/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %12493 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.142Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %12494 = torch.operator "onnx.LayerNormalization"(%12473, %12492, %12493) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12495 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12496 = torch.operator "onnx.Unsqueeze"(%12488, %12495) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %12497 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %12498 = torch.operator "onnx.Add"(%12496, %12497) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %12499 = torch.operator "onnx.Mul"(%12494, %12498) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12500 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12501 = torch.operator "onnx.Unsqueeze"(%12485, %12500) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %12502 = torch.operator "onnx.Add"(%12499, %12501) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12503 = torch.operator "onnx.MatMul"(%12502, %1039) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12504 = torch.operator "onnx.Add"(%522, %12503) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12505 = torch.operator "onnx.Mul"(%12504, %12504) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12506 = torch.operator "onnx.Mul"(%12504, %12505) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12507 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %12508 = torch.operator "onnx.Mul"(%12507, %12506) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12509 = torch.operator "onnx.Add"(%12504, %12508) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12510 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %12511 = torch.operator "onnx.Mul"(%12510, %12509) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12512 = torch.operator "onnx.Tanh"(%12511) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12513 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %12514 = torch.operator "onnx.Add"(%12513, %12512) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12515 = torch.operator "onnx.Mul"(%12504, %12514) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12516 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %12517 = torch.operator "onnx.Mul"(%12516, %12515) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12518 = torch.operator "onnx.Shape"(%12502) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %12519 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12520 = torch.operator "onnx.Gather"(%12518, %12519) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12521 = torch.operator "onnx.MatMul"(%12502, %1040) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12522 = torch.operator "onnx.Add"(%526, %12521) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12523 = torch.operator "onnx.MatMul"(%12502, %1041) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12524 = torch.operator "onnx.Add"(%527, %12523) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12525 = torch.operator "onnx.MatMul"(%12502, %1042) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12526 = torch.operator "onnx.Add"(%528, %12525) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12527 = torch.operator "onnx.Shape"(%12524) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %12528 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12529 = torch.operator "onnx.Gather"(%12527, %12528) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12530 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12531 = torch.operator "onnx.Div"(%12529, %12530) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12532 = torch.operator "onnx.Cast"(%12531) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12533 = torch.operator "onnx.Cast"(%12532) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12534 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17076_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12535 = torch.operator "onnx.Unsqueeze"(%12520, %12534) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12536 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12537 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12538 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17080_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12539 = torch.operator "onnx.Unsqueeze"(%12533, %12538) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12540 = torch.operator "onnx.Concat"(%12535, %12536, %12537, %12539) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %12541 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17083_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12542 = torch.operator "onnx.Unsqueeze"(%12520, %12541) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12543 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12544 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12545 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17087_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12546 = torch.operator "onnx.Unsqueeze"(%12533, %12545) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12547 = torch.operator "onnx.Concat"(%12542, %12543, %12544, %12546) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %12548 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17090_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12549 = torch.operator "onnx.Unsqueeze"(%12520, %12548) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12550 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12551 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12552 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17094_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12553 = torch.operator "onnx.Unsqueeze"(%12533, %12552) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12554 = torch.operator "onnx.Concat"(%12549, %12550, %12551, %12553) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %12555 = torch.operator "onnx.Reshape"(%12522, %12540) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %12556 = torch.operator "onnx.Transpose"(%12555) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %12557 = torch.operator "onnx.Reshape"(%12524, %12547) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %12558 = torch.operator "onnx.Transpose"(%12557) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %12559 = torch.operator "onnx.Reshape"(%12526, %12554) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %12560 = torch.operator "onnx.Transpose"(%12559) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %12561 = torch.operator "onnx.Cast"(%12556) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %12562 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %12563 = torch.operator "onnx.Pow"(%12561, %12562) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %12564 = torch.operator "onnx.ReduceMean"(%12563) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %12565 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %12566 = torch.operator "onnx.Add"(%12564, %12565) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %12567 = torch.operator "onnx.Sqrt"(%12566) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %12568 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %12569 = torch.operator "onnx.Div"(%12568, %12567) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %12570 = torch.operator "onnx.Cast"(%12556) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %12571 = torch.operator "onnx.Mul"(%12570, %12569) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %12572 = torch.operator "onnx.Cast"(%12571) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %12573 = torch.operator "onnx.Mul"(%12572, %524) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %12574 = torch.operator "onnx.Cast"(%12558) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %12575 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %12576 = torch.operator "onnx.Pow"(%12574, %12575) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %12577 = torch.operator "onnx.ReduceMean"(%12576) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %12578 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %12579 = torch.operator "onnx.Add"(%12577, %12578) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %12580 = torch.operator "onnx.Sqrt"(%12579) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %12581 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %12582 = torch.operator "onnx.Div"(%12581, %12580) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %12583 = torch.operator "onnx.Cast"(%12558) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %12584 = torch.operator "onnx.Mul"(%12583, %12582) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %12585 = torch.operator "onnx.Cast"(%12584) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %12586 = torch.operator "onnx.Mul"(%12585, %525) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %12587 = torch.operator "onnx.Shape"(%12573) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %12588 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12589 = torch.operator "onnx.Gather"(%12587, %12588) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12590 = torch.operator "onnx.Shape"(%12573) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %12591 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12592 = torch.operator "onnx.Gather"(%12590, %12591) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12593 = torch.operator "onnx.Shape"(%12573) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %12594 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12595 = torch.operator "onnx.Gather"(%12593, %12594) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12596 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17138_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12597 = torch.operator "onnx.Unsqueeze"(%12589, %12596) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12598 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17140_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12599 = torch.operator "onnx.Unsqueeze"(%12592, %12598) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12600 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17142_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12601 = torch.operator "onnx.Unsqueeze"(%12595, %12600) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12602 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12603 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12604 = torch.operator "onnx.Concat"(%12597, %12599, %12601, %12602, %12603) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %12605 = torch.operator "onnx.Reshape"(%12573, %12604) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %12606 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %12607:2 = torch.operator "onnx.Split"(%12605, %12606) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %12608 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12609 = torch.operator "onnx.Squeeze"(%12607#0, %12608) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %12610 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12611 = torch.operator "onnx.Squeeze"(%12607#1, %12610) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %12612 = torch.operator "onnx.Neg"(%12611) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %12613 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12614 = torch.operator "onnx.Unsqueeze"(%12612, %12613) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %12615 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12616 = torch.operator "onnx.Unsqueeze"(%12609, %12615) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %12617 = torch.operator "onnx.Concat"(%12614, %12616) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %12618 = torch.operator "onnx.Shape"(%12617) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %12619 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12620 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12621 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12622 = torch.operator "onnx.Slice"(%12618, %12620, %12621, %12619) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %12623 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12624 = torch.operator "onnx.Concat"(%12622, %12623) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %12625 = torch.operator "onnx.Reshape"(%12617, %12624) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %12626 = torch.operator "onnx.Cast"(%12573) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %12627 = torch.operator "onnx.Mul"(%12626, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %12628 = torch.operator "onnx.Cast"(%12625) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %12629 = torch.operator "onnx.Mul"(%12628, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %12630 = torch.operator "onnx.Add"(%12627, %12629) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %12631 = torch.operator "onnx.Cast"(%12630) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %12632 = torch.operator "onnx.Shape"(%12586) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %12633 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12634 = torch.operator "onnx.Gather"(%12632, %12633) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12635 = torch.operator "onnx.Shape"(%12586) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %12636 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12637 = torch.operator "onnx.Gather"(%12635, %12636) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12638 = torch.operator "onnx.Shape"(%12586) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %12639 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12640 = torch.operator "onnx.Gather"(%12638, %12639) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12641 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17183_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12642 = torch.operator "onnx.Unsqueeze"(%12634, %12641) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12643 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17185_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12644 = torch.operator "onnx.Unsqueeze"(%12637, %12643) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12645 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17187_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12646 = torch.operator "onnx.Unsqueeze"(%12640, %12645) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12647 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12648 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12649 = torch.operator "onnx.Concat"(%12642, %12644, %12646, %12647, %12648) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %12650 = torch.operator "onnx.Reshape"(%12586, %12649) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %12651 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %12652:2 = torch.operator "onnx.Split"(%12650, %12651) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %12653 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12654 = torch.operator "onnx.Squeeze"(%12652#0, %12653) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %12655 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12656 = torch.operator "onnx.Squeeze"(%12652#1, %12655) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %12657 = torch.operator "onnx.Neg"(%12656) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %12658 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12659 = torch.operator "onnx.Unsqueeze"(%12657, %12658) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %12660 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12661 = torch.operator "onnx.Unsqueeze"(%12654, %12660) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %12662 = torch.operator "onnx.Concat"(%12659, %12661) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %12663 = torch.operator "onnx.Shape"(%12662) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %12664 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12665 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12666 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12667 = torch.operator "onnx.Slice"(%12663, %12665, %12666, %12664) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %12668 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12669 = torch.operator "onnx.Concat"(%12667, %12668) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %12670 = torch.operator "onnx.Reshape"(%12662, %12669) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %12671 = torch.operator "onnx.Cast"(%12586) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %12672 = torch.operator "onnx.Mul"(%12671, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %12673 = torch.operator "onnx.Cast"(%12670) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %12674 = torch.operator "onnx.Mul"(%12673, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %12675 = torch.operator "onnx.Add"(%12672, %12674) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %12676 = torch.operator "onnx.Cast"(%12675) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %12677 = torch.operator "onnx.Shape"(%12631) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %12678 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12679 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12680 = torch.operator "onnx.Slice"(%12677, %12678, %12679) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12681 = torch.operator "onnx.Cast"(%12680) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %12682 = torch.operator "onnx.Sqrt"(%12681) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %12683 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %12684 = torch.operator "onnx.Cast"(%12682) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %12685 = torch.operator "onnx.Div"(%12683, %12684) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %12686 = torch.operator "onnx.Cast"(%12685) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %12687 = torch.operator "onnx.Transpose"(%12676) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %12688 = torch.operator "onnx.Sqrt"(%12686) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %12689 = torch.operator "onnx.Mul"(%12631, %12688) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %12690 = torch.operator "onnx.Sqrt"(%12686) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %12691 = torch.operator "onnx.Mul"(%12687, %12690) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %12692 = torch.operator "onnx.MatMul"(%12689, %12691) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %12693 = torch.operator "onnx.Softmax"(%12692) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %12694 = torch.operator "onnx.MatMul"(%12693, %12560) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %12695 = torch.operator "onnx.Transpose"(%12694) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %12696 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12697 = torch.operator "onnx.Mul"(%12533, %12696) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12698 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17240_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12699 = torch.operator "onnx.Unsqueeze"(%12520, %12698) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12700 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12701 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17243_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12702 = torch.operator "onnx.Unsqueeze"(%12697, %12701) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12703 = torch.operator "onnx.Concat"(%12699, %12700, %12702) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %12704 = torch.operator "onnx.Reshape"(%12695, %12703) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %12705 = torch.operator "onnx.Cast"(%12704) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %12706 = torch.operator "onnx.Concat"(%12705, %12517) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %12707 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.14_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12708 = torch.operator "onnx.Unsqueeze"(%12491, %12707) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %12709 = torch.operator "onnx.MatMul"(%12706, %1043) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12710 = torch.operator "onnx.Add"(%523, %12709) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12711 = torch.operator "onnx.Mul"(%12708, %12710) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12712 = torch.operator "onnx.Add"(%12473, %12711) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12713 = torch.operator "onnx.Gemm"(%1285, %529, %530) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %12714 = torch.operator "onnx.Shape"(%12713) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %12715 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12716 = torch.operator "onnx.Gather"(%12714, %12715) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12717 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12718 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12719 = torch.operator "onnx.Add"(%12716, %12718) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12720 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12721 = torch.operator "onnx.Div"(%12719, %12720) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12722 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12723 = torch.operator "onnx.Mul"(%12721, %12722) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12724 = torch.operator "onnx.Slice"(%12713, %12717, %12723, %12715) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %12725 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12726 = torch.operator "onnx.Mul"(%12721, %12725) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12727 = torch.operator "onnx.Slice"(%12713, %12723, %12726, %12715) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %12728 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12729 = torch.operator "onnx.Mul"(%12721, %12728) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12730 = torch.operator "onnx.Slice"(%12713, %12726, %12729, %12715) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.152Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.15/norm/norm/Constant_attr__value" : tensor<3072xbf16> %12731 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.152Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.152Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.15/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %12732 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.152Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %12733 = torch.operator "onnx.LayerNormalization"(%12712, %12731, %12732) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12734 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12735 = torch.operator "onnx.Unsqueeze"(%12727, %12734) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %12736 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %12737 = torch.operator "onnx.Add"(%12735, %12736) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %12738 = torch.operator "onnx.Mul"(%12733, %12737) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12739 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12740 = torch.operator "onnx.Unsqueeze"(%12724, %12739) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %12741 = torch.operator "onnx.Add"(%12738, %12740) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12742 = torch.operator "onnx.MatMul"(%12741, %1044) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12743 = torch.operator "onnx.Add"(%531, %12742) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12744 = torch.operator "onnx.Mul"(%12743, %12743) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12745 = torch.operator "onnx.Mul"(%12743, %12744) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12746 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %12747 = torch.operator "onnx.Mul"(%12746, %12745) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12748 = torch.operator "onnx.Add"(%12743, %12747) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12749 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %12750 = torch.operator "onnx.Mul"(%12749, %12748) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12751 = torch.operator "onnx.Tanh"(%12750) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12752 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %12753 = torch.operator "onnx.Add"(%12752, %12751) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12754 = torch.operator "onnx.Mul"(%12743, %12753) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12755 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %12756 = torch.operator "onnx.Mul"(%12755, %12754) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12757 = torch.operator "onnx.Shape"(%12741) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %12758 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12759 = torch.operator "onnx.Gather"(%12757, %12758) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12760 = torch.operator "onnx.MatMul"(%12741, %1045) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12761 = torch.operator "onnx.Add"(%535, %12760) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12762 = torch.operator "onnx.MatMul"(%12741, %1046) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12763 = torch.operator "onnx.Add"(%536, %12762) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12764 = torch.operator "onnx.MatMul"(%12741, %1047) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12765 = torch.operator "onnx.Add"(%537, %12764) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12766 = torch.operator "onnx.Shape"(%12763) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %12767 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12768 = torch.operator "onnx.Gather"(%12766, %12767) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12769 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12770 = torch.operator "onnx.Div"(%12768, %12769) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12771 = torch.operator "onnx.Cast"(%12770) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12772 = torch.operator "onnx.Cast"(%12771) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12773 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17315_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12774 = torch.operator "onnx.Unsqueeze"(%12759, %12773) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12775 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12776 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12777 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17319_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12778 = torch.operator "onnx.Unsqueeze"(%12772, %12777) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12779 = torch.operator "onnx.Concat"(%12774, %12775, %12776, %12778) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %12780 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17322_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12781 = torch.operator "onnx.Unsqueeze"(%12759, %12780) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12782 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12783 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12784 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17326_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12785 = torch.operator "onnx.Unsqueeze"(%12772, %12784) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12786 = torch.operator "onnx.Concat"(%12781, %12782, %12783, %12785) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %12787 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17329_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12788 = torch.operator "onnx.Unsqueeze"(%12759, %12787) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12789 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12790 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12791 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17333_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12792 = torch.operator "onnx.Unsqueeze"(%12772, %12791) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12793 = torch.operator "onnx.Concat"(%12788, %12789, %12790, %12792) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %12794 = torch.operator "onnx.Reshape"(%12761, %12779) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %12795 = torch.operator "onnx.Transpose"(%12794) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %12796 = torch.operator "onnx.Reshape"(%12763, %12786) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %12797 = torch.operator "onnx.Transpose"(%12796) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %12798 = torch.operator "onnx.Reshape"(%12765, %12793) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %12799 = torch.operator "onnx.Transpose"(%12798) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %12800 = torch.operator "onnx.Cast"(%12795) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %12801 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %12802 = torch.operator "onnx.Pow"(%12800, %12801) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %12803 = torch.operator "onnx.ReduceMean"(%12802) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %12804 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %12805 = torch.operator "onnx.Add"(%12803, %12804) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %12806 = torch.operator "onnx.Sqrt"(%12805) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %12807 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %12808 = torch.operator "onnx.Div"(%12807, %12806) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %12809 = torch.operator "onnx.Cast"(%12795) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %12810 = torch.operator "onnx.Mul"(%12809, %12808) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %12811 = torch.operator "onnx.Cast"(%12810) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %12812 = torch.operator "onnx.Mul"(%12811, %533) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %12813 = torch.operator "onnx.Cast"(%12797) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %12814 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %12815 = torch.operator "onnx.Pow"(%12813, %12814) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %12816 = torch.operator "onnx.ReduceMean"(%12815) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %12817 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %12818 = torch.operator "onnx.Add"(%12816, %12817) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %12819 = torch.operator "onnx.Sqrt"(%12818) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %12820 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %12821 = torch.operator "onnx.Div"(%12820, %12819) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %12822 = torch.operator "onnx.Cast"(%12797) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %12823 = torch.operator "onnx.Mul"(%12822, %12821) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %12824 = torch.operator "onnx.Cast"(%12823) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %12825 = torch.operator "onnx.Mul"(%12824, %534) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %12826 = torch.operator "onnx.Shape"(%12812) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %12827 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12828 = torch.operator "onnx.Gather"(%12826, %12827) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12829 = torch.operator "onnx.Shape"(%12812) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %12830 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12831 = torch.operator "onnx.Gather"(%12829, %12830) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12832 = torch.operator "onnx.Shape"(%12812) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %12833 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12834 = torch.operator "onnx.Gather"(%12832, %12833) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12835 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17377_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12836 = torch.operator "onnx.Unsqueeze"(%12828, %12835) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12837 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17379_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12838 = torch.operator "onnx.Unsqueeze"(%12831, %12837) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12839 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17381_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12840 = torch.operator "onnx.Unsqueeze"(%12834, %12839) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12841 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12842 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12843 = torch.operator "onnx.Concat"(%12836, %12838, %12840, %12841, %12842) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %12844 = torch.operator "onnx.Reshape"(%12812, %12843) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %12845 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %12846:2 = torch.operator "onnx.Split"(%12844, %12845) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %12847 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12848 = torch.operator "onnx.Squeeze"(%12846#0, %12847) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %12849 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12850 = torch.operator "onnx.Squeeze"(%12846#1, %12849) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %12851 = torch.operator "onnx.Neg"(%12850) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %12852 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12853 = torch.operator "onnx.Unsqueeze"(%12851, %12852) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %12854 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12855 = torch.operator "onnx.Unsqueeze"(%12848, %12854) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %12856 = torch.operator "onnx.Concat"(%12853, %12855) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %12857 = torch.operator "onnx.Shape"(%12856) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %12858 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12859 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12860 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12861 = torch.operator "onnx.Slice"(%12857, %12859, %12860, %12858) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %12862 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12863 = torch.operator "onnx.Concat"(%12861, %12862) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %12864 = torch.operator "onnx.Reshape"(%12856, %12863) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %12865 = torch.operator "onnx.Cast"(%12812) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %12866 = torch.operator "onnx.Mul"(%12865, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %12867 = torch.operator "onnx.Cast"(%12864) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %12868 = torch.operator "onnx.Mul"(%12867, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %12869 = torch.operator "onnx.Add"(%12866, %12868) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %12870 = torch.operator "onnx.Cast"(%12869) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %12871 = torch.operator "onnx.Shape"(%12825) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %12872 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12873 = torch.operator "onnx.Gather"(%12871, %12872) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12874 = torch.operator "onnx.Shape"(%12825) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %12875 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12876 = torch.operator "onnx.Gather"(%12874, %12875) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12877 = torch.operator "onnx.Shape"(%12825) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %12878 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12879 = torch.operator "onnx.Gather"(%12877, %12878) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12880 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17422_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12881 = torch.operator "onnx.Unsqueeze"(%12873, %12880) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12882 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17424_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12883 = torch.operator "onnx.Unsqueeze"(%12876, %12882) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12884 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17426_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12885 = torch.operator "onnx.Unsqueeze"(%12879, %12884) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12886 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12887 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12888 = torch.operator "onnx.Concat"(%12881, %12883, %12885, %12886, %12887) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %12889 = torch.operator "onnx.Reshape"(%12825, %12888) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %12890 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %12891:2 = torch.operator "onnx.Split"(%12889, %12890) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %12892 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12893 = torch.operator "onnx.Squeeze"(%12891#0, %12892) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %12894 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12895 = torch.operator "onnx.Squeeze"(%12891#1, %12894) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %12896 = torch.operator "onnx.Neg"(%12895) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %12897 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12898 = torch.operator "onnx.Unsqueeze"(%12896, %12897) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %12899 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12900 = torch.operator "onnx.Unsqueeze"(%12893, %12899) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %12901 = torch.operator "onnx.Concat"(%12898, %12900) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %12902 = torch.operator "onnx.Shape"(%12901) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %12903 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12904 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12905 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12906 = torch.operator "onnx.Slice"(%12902, %12904, %12905, %12903) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %12907 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12908 = torch.operator "onnx.Concat"(%12906, %12907) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %12909 = torch.operator "onnx.Reshape"(%12901, %12908) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %12910 = torch.operator "onnx.Cast"(%12825) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %12911 = torch.operator "onnx.Mul"(%12910, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %12912 = torch.operator "onnx.Cast"(%12909) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %12913 = torch.operator "onnx.Mul"(%12912, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %12914 = torch.operator "onnx.Add"(%12911, %12913) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %12915 = torch.operator "onnx.Cast"(%12914) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %12916 = torch.operator "onnx.Shape"(%12870) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %12917 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12918 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12919 = torch.operator "onnx.Slice"(%12916, %12917, %12918) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12920 = torch.operator "onnx.Cast"(%12919) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %12921 = torch.operator "onnx.Sqrt"(%12920) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %12922 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %12923 = torch.operator "onnx.Cast"(%12921) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %12924 = torch.operator "onnx.Div"(%12922, %12923) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %12925 = torch.operator "onnx.Cast"(%12924) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %12926 = torch.operator "onnx.Transpose"(%12915) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %12927 = torch.operator "onnx.Sqrt"(%12925) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %12928 = torch.operator "onnx.Mul"(%12870, %12927) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %12929 = torch.operator "onnx.Sqrt"(%12925) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %12930 = torch.operator "onnx.Mul"(%12926, %12929) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %12931 = torch.operator "onnx.MatMul"(%12928, %12930) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %12932 = torch.operator "onnx.Softmax"(%12931) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %12933 = torch.operator "onnx.MatMul"(%12932, %12799) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %12934 = torch.operator "onnx.Transpose"(%12933) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %12935 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12936 = torch.operator "onnx.Mul"(%12772, %12935) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12937 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17479_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12938 = torch.operator "onnx.Unsqueeze"(%12759, %12937) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12939 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12940 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17482_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12941 = torch.operator "onnx.Unsqueeze"(%12936, %12940) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12942 = torch.operator "onnx.Concat"(%12938, %12939, %12941) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %12943 = torch.operator "onnx.Reshape"(%12934, %12942) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %12944 = torch.operator "onnx.Cast"(%12943) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %12945 = torch.operator "onnx.Concat"(%12944, %12756) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %12946 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.15_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12947 = torch.operator "onnx.Unsqueeze"(%12730, %12946) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %12948 = torch.operator "onnx.MatMul"(%12945, %1048) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12949 = torch.operator "onnx.Add"(%532, %12948) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12950 = torch.operator "onnx.Mul"(%12947, %12949) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12951 = torch.operator "onnx.Add"(%12712, %12950) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12952 = torch.operator "onnx.Gemm"(%1285, %538, %539) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %12953 = torch.operator "onnx.Shape"(%12952) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %12954 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12955 = torch.operator "onnx.Gather"(%12953, %12954) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12956 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12957 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12958 = torch.operator "onnx.Add"(%12955, %12957) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12959 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12960 = torch.operator "onnx.Div"(%12958, %12959) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12961 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12962 = torch.operator "onnx.Mul"(%12960, %12961) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12963 = torch.operator "onnx.Slice"(%12952, %12956, %12962, %12954) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %12964 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12965 = torch.operator "onnx.Mul"(%12960, %12964) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12966 = torch.operator "onnx.Slice"(%12952, %12962, %12965, %12954) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %12967 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12968 = torch.operator "onnx.Mul"(%12960, %12967) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %12969 = torch.operator "onnx.Slice"(%12952, %12965, %12968, %12954) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.162Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.16/norm/norm/Constant_attr__value" : tensor<3072xbf16> %12970 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.162Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.162Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.16/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %12971 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.162Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %12972 = torch.operator "onnx.LayerNormalization"(%12951, %12970, %12971) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12973 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12974 = torch.operator "onnx.Unsqueeze"(%12966, %12973) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %12975 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %12976 = torch.operator "onnx.Add"(%12974, %12975) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %12977 = torch.operator "onnx.Mul"(%12972, %12976) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12978 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %12979 = torch.operator "onnx.Unsqueeze"(%12963, %12978) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %12980 = torch.operator "onnx.Add"(%12977, %12979) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %12981 = torch.operator "onnx.MatMul"(%12980, %1049) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12982 = torch.operator "onnx.Add"(%540, %12981) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12983 = torch.operator "onnx.Mul"(%12982, %12982) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12984 = torch.operator "onnx.Mul"(%12982, %12983) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12985 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %12986 = torch.operator "onnx.Mul"(%12985, %12984) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12987 = torch.operator "onnx.Add"(%12982, %12986) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12988 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %12989 = torch.operator "onnx.Mul"(%12988, %12987) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12990 = torch.operator "onnx.Tanh"(%12989) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12991 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %12992 = torch.operator "onnx.Add"(%12991, %12990) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12993 = torch.operator "onnx.Mul"(%12982, %12992) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12994 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %12995 = torch.operator "onnx.Mul"(%12994, %12993) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %12996 = torch.operator "onnx.Shape"(%12980) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %12997 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %12998 = torch.operator "onnx.Gather"(%12996, %12997) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %12999 = torch.operator "onnx.MatMul"(%12980, %1050) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13000 = torch.operator "onnx.Add"(%544, %12999) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13001 = torch.operator "onnx.MatMul"(%12980, %1051) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13002 = torch.operator "onnx.Add"(%545, %13001) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13003 = torch.operator "onnx.MatMul"(%12980, %1052) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13004 = torch.operator "onnx.Add"(%546, %13003) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13005 = torch.operator "onnx.Shape"(%13002) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %13006 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13007 = torch.operator "onnx.Gather"(%13005, %13006) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13008 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13009 = torch.operator "onnx.Div"(%13007, %13008) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13010 = torch.operator "onnx.Cast"(%13009) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13011 = torch.operator "onnx.Cast"(%13010) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13012 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17554_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13013 = torch.operator "onnx.Unsqueeze"(%12998, %13012) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13014 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13015 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13016 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17558_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13017 = torch.operator "onnx.Unsqueeze"(%13011, %13016) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13018 = torch.operator "onnx.Concat"(%13013, %13014, %13015, %13017) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %13019 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17561_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13020 = torch.operator "onnx.Unsqueeze"(%12998, %13019) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13021 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13022 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13023 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17565_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13024 = torch.operator "onnx.Unsqueeze"(%13011, %13023) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13025 = torch.operator "onnx.Concat"(%13020, %13021, %13022, %13024) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %13026 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17568_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13027 = torch.operator "onnx.Unsqueeze"(%12998, %13026) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13028 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13029 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13030 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17572_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13031 = torch.operator "onnx.Unsqueeze"(%13011, %13030) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13032 = torch.operator "onnx.Concat"(%13027, %13028, %13029, %13031) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %13033 = torch.operator "onnx.Reshape"(%13000, %13018) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13034 = torch.operator "onnx.Transpose"(%13033) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %13035 = torch.operator "onnx.Reshape"(%13002, %13025) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13036 = torch.operator "onnx.Transpose"(%13035) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %13037 = torch.operator "onnx.Reshape"(%13004, %13032) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13038 = torch.operator "onnx.Transpose"(%13037) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %13039 = torch.operator "onnx.Cast"(%13034) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %13040 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %13041 = torch.operator "onnx.Pow"(%13039, %13040) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %13042 = torch.operator "onnx.ReduceMean"(%13041) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %13043 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %13044 = torch.operator "onnx.Add"(%13042, %13043) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %13045 = torch.operator "onnx.Sqrt"(%13044) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %13046 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %13047 = torch.operator "onnx.Div"(%13046, %13045) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %13048 = torch.operator "onnx.Cast"(%13034) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %13049 = torch.operator "onnx.Mul"(%13048, %13047) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %13050 = torch.operator "onnx.Cast"(%13049) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %13051 = torch.operator "onnx.Mul"(%13050, %542) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %13052 = torch.operator "onnx.Cast"(%13036) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %13053 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %13054 = torch.operator "onnx.Pow"(%13052, %13053) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %13055 = torch.operator "onnx.ReduceMean"(%13054) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %13056 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %13057 = torch.operator "onnx.Add"(%13055, %13056) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %13058 = torch.operator "onnx.Sqrt"(%13057) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %13059 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %13060 = torch.operator "onnx.Div"(%13059, %13058) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %13061 = torch.operator "onnx.Cast"(%13036) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %13062 = torch.operator "onnx.Mul"(%13061, %13060) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %13063 = torch.operator "onnx.Cast"(%13062) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %13064 = torch.operator "onnx.Mul"(%13063, %543) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %13065 = torch.operator "onnx.Shape"(%13051) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %13066 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13067 = torch.operator "onnx.Gather"(%13065, %13066) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13068 = torch.operator "onnx.Shape"(%13051) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %13069 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13070 = torch.operator "onnx.Gather"(%13068, %13069) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13071 = torch.operator "onnx.Shape"(%13051) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %13072 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13073 = torch.operator "onnx.Gather"(%13071, %13072) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13074 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17616_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13075 = torch.operator "onnx.Unsqueeze"(%13067, %13074) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13076 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17618_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13077 = torch.operator "onnx.Unsqueeze"(%13070, %13076) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13078 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17620_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13079 = torch.operator "onnx.Unsqueeze"(%13073, %13078) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13080 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13081 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13082 = torch.operator "onnx.Concat"(%13075, %13077, %13079, %13080, %13081) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %13083 = torch.operator "onnx.Reshape"(%13051, %13082) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %13084 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %13085:2 = torch.operator "onnx.Split"(%13083, %13084) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %13086 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13087 = torch.operator "onnx.Squeeze"(%13085#0, %13086) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13088 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13089 = torch.operator "onnx.Squeeze"(%13085#1, %13088) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13090 = torch.operator "onnx.Neg"(%13089) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %13091 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13092 = torch.operator "onnx.Unsqueeze"(%13090, %13091) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %13093 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13094 = torch.operator "onnx.Unsqueeze"(%13087, %13093) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %13095 = torch.operator "onnx.Concat"(%13092, %13094) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %13096 = torch.operator "onnx.Shape"(%13095) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %13097 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13098 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13099 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13100 = torch.operator "onnx.Slice"(%13096, %13098, %13099, %13097) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %13101 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13102 = torch.operator "onnx.Concat"(%13100, %13101) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %13103 = torch.operator "onnx.Reshape"(%13095, %13102) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13104 = torch.operator "onnx.Cast"(%13051) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %13105 = torch.operator "onnx.Mul"(%13104, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %13106 = torch.operator "onnx.Cast"(%13103) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %13107 = torch.operator "onnx.Mul"(%13106, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %13108 = torch.operator "onnx.Add"(%13105, %13107) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %13109 = torch.operator "onnx.Cast"(%13108) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %13110 = torch.operator "onnx.Shape"(%13064) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %13111 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13112 = torch.operator "onnx.Gather"(%13110, %13111) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13113 = torch.operator "onnx.Shape"(%13064) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %13114 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13115 = torch.operator "onnx.Gather"(%13113, %13114) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13116 = torch.operator "onnx.Shape"(%13064) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %13117 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13118 = torch.operator "onnx.Gather"(%13116, %13117) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13119 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17661_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13120 = torch.operator "onnx.Unsqueeze"(%13112, %13119) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13121 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17663_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13122 = torch.operator "onnx.Unsqueeze"(%13115, %13121) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13123 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17665_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13124 = torch.operator "onnx.Unsqueeze"(%13118, %13123) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13125 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13126 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13127 = torch.operator "onnx.Concat"(%13120, %13122, %13124, %13125, %13126) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %13128 = torch.operator "onnx.Reshape"(%13064, %13127) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %13129 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %13130:2 = torch.operator "onnx.Split"(%13128, %13129) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %13131 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13132 = torch.operator "onnx.Squeeze"(%13130#0, %13131) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13133 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13134 = torch.operator "onnx.Squeeze"(%13130#1, %13133) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13135 = torch.operator "onnx.Neg"(%13134) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %13136 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13137 = torch.operator "onnx.Unsqueeze"(%13135, %13136) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %13138 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13139 = torch.operator "onnx.Unsqueeze"(%13132, %13138) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %13140 = torch.operator "onnx.Concat"(%13137, %13139) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %13141 = torch.operator "onnx.Shape"(%13140) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %13142 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13143 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13144 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13145 = torch.operator "onnx.Slice"(%13141, %13143, %13144, %13142) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %13146 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13147 = torch.operator "onnx.Concat"(%13145, %13146) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %13148 = torch.operator "onnx.Reshape"(%13140, %13147) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13149 = torch.operator "onnx.Cast"(%13064) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %13150 = torch.operator "onnx.Mul"(%13149, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %13151 = torch.operator "onnx.Cast"(%13148) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %13152 = torch.operator "onnx.Mul"(%13151, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %13153 = torch.operator "onnx.Add"(%13150, %13152) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %13154 = torch.operator "onnx.Cast"(%13153) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %13155 = torch.operator "onnx.Shape"(%13109) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %13156 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13157 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13158 = torch.operator "onnx.Slice"(%13155, %13156, %13157) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13159 = torch.operator "onnx.Cast"(%13158) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %13160 = torch.operator "onnx.Sqrt"(%13159) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %13161 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %13162 = torch.operator "onnx.Cast"(%13160) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %13163 = torch.operator "onnx.Div"(%13161, %13162) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %13164 = torch.operator "onnx.Cast"(%13163) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %13165 = torch.operator "onnx.Transpose"(%13154) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %13166 = torch.operator "onnx.Sqrt"(%13164) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %13167 = torch.operator "onnx.Mul"(%13109, %13166) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %13168 = torch.operator "onnx.Sqrt"(%13164) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %13169 = torch.operator "onnx.Mul"(%13165, %13168) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %13170 = torch.operator "onnx.MatMul"(%13167, %13169) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %13171 = torch.operator "onnx.Softmax"(%13170) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %13172 = torch.operator "onnx.MatMul"(%13171, %13038) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %13173 = torch.operator "onnx.Transpose"(%13172) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %13174 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13175 = torch.operator "onnx.Mul"(%13011, %13174) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13176 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17718_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13177 = torch.operator "onnx.Unsqueeze"(%12998, %13176) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13178 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13179 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17721_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13180 = torch.operator "onnx.Unsqueeze"(%13175, %13179) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13181 = torch.operator "onnx.Concat"(%13177, %13178, %13180) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %13182 = torch.operator "onnx.Reshape"(%13173, %13181) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %13183 = torch.operator "onnx.Cast"(%13182) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %13184 = torch.operator "onnx.Concat"(%13183, %12995) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %13185 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.16_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13186 = torch.operator "onnx.Unsqueeze"(%12969, %13185) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %13187 = torch.operator "onnx.MatMul"(%13184, %1053) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13188 = torch.operator "onnx.Add"(%541, %13187) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13189 = torch.operator "onnx.Mul"(%13186, %13188) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13190 = torch.operator "onnx.Add"(%12951, %13189) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13191 = torch.operator "onnx.Gemm"(%1285, %547, %548) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %13192 = torch.operator "onnx.Shape"(%13191) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %13193 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13194 = torch.operator "onnx.Gather"(%13192, %13193) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13195 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13196 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13197 = torch.operator "onnx.Add"(%13194, %13196) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13198 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13199 = torch.operator "onnx.Div"(%13197, %13198) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13200 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13201 = torch.operator "onnx.Mul"(%13199, %13200) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13202 = torch.operator "onnx.Slice"(%13191, %13195, %13201, %13193) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %13203 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13204 = torch.operator "onnx.Mul"(%13199, %13203) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13205 = torch.operator "onnx.Slice"(%13191, %13201, %13204, %13193) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %13206 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13207 = torch.operator "onnx.Mul"(%13199, %13206) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13208 = torch.operator "onnx.Slice"(%13191, %13204, %13207, %13193) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.172Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.17/norm/norm/Constant_attr__value" : tensor<3072xbf16> %13209 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.172Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.172Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.17/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %13210 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.172Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %13211 = torch.operator "onnx.LayerNormalization"(%13190, %13209, %13210) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13212 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13213 = torch.operator "onnx.Unsqueeze"(%13205, %13212) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %13214 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %13215 = torch.operator "onnx.Add"(%13213, %13214) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %13216 = torch.operator "onnx.Mul"(%13211, %13215) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13217 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13218 = torch.operator "onnx.Unsqueeze"(%13202, %13217) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %13219 = torch.operator "onnx.Add"(%13216, %13218) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13220 = torch.operator "onnx.MatMul"(%13219, %1054) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13221 = torch.operator "onnx.Add"(%549, %13220) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13222 = torch.operator "onnx.Mul"(%13221, %13221) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13223 = torch.operator "onnx.Mul"(%13221, %13222) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13224 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %13225 = torch.operator "onnx.Mul"(%13224, %13223) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13226 = torch.operator "onnx.Add"(%13221, %13225) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13227 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %13228 = torch.operator "onnx.Mul"(%13227, %13226) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13229 = torch.operator "onnx.Tanh"(%13228) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13230 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %13231 = torch.operator "onnx.Add"(%13230, %13229) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13232 = torch.operator "onnx.Mul"(%13221, %13231) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13233 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %13234 = torch.operator "onnx.Mul"(%13233, %13232) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13235 = torch.operator "onnx.Shape"(%13219) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %13236 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13237 = torch.operator "onnx.Gather"(%13235, %13236) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13238 = torch.operator "onnx.MatMul"(%13219, %1055) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13239 = torch.operator "onnx.Add"(%553, %13238) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13240 = torch.operator "onnx.MatMul"(%13219, %1056) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13241 = torch.operator "onnx.Add"(%554, %13240) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13242 = torch.operator "onnx.MatMul"(%13219, %1057) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13243 = torch.operator "onnx.Add"(%555, %13242) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13244 = torch.operator "onnx.Shape"(%13241) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %13245 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13246 = torch.operator "onnx.Gather"(%13244, %13245) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13247 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13248 = torch.operator "onnx.Div"(%13246, %13247) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13249 = torch.operator "onnx.Cast"(%13248) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13250 = torch.operator "onnx.Cast"(%13249) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13251 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17793_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13252 = torch.operator "onnx.Unsqueeze"(%13237, %13251) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13253 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13254 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13255 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17797_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13256 = torch.operator "onnx.Unsqueeze"(%13250, %13255) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13257 = torch.operator "onnx.Concat"(%13252, %13253, %13254, %13256) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %13258 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17800_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13259 = torch.operator "onnx.Unsqueeze"(%13237, %13258) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13260 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13261 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13262 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17804_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13263 = torch.operator "onnx.Unsqueeze"(%13250, %13262) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13264 = torch.operator "onnx.Concat"(%13259, %13260, %13261, %13263) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %13265 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17807_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13266 = torch.operator "onnx.Unsqueeze"(%13237, %13265) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13267 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13268 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13269 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17811_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13270 = torch.operator "onnx.Unsqueeze"(%13250, %13269) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13271 = torch.operator "onnx.Concat"(%13266, %13267, %13268, %13270) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %13272 = torch.operator "onnx.Reshape"(%13239, %13257) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13273 = torch.operator "onnx.Transpose"(%13272) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %13274 = torch.operator "onnx.Reshape"(%13241, %13264) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13275 = torch.operator "onnx.Transpose"(%13274) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %13276 = torch.operator "onnx.Reshape"(%13243, %13271) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13277 = torch.operator "onnx.Transpose"(%13276) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %13278 = torch.operator "onnx.Cast"(%13273) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %13279 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %13280 = torch.operator "onnx.Pow"(%13278, %13279) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %13281 = torch.operator "onnx.ReduceMean"(%13280) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %13282 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %13283 = torch.operator "onnx.Add"(%13281, %13282) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %13284 = torch.operator "onnx.Sqrt"(%13283) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %13285 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %13286 = torch.operator "onnx.Div"(%13285, %13284) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %13287 = torch.operator "onnx.Cast"(%13273) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %13288 = torch.operator "onnx.Mul"(%13287, %13286) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %13289 = torch.operator "onnx.Cast"(%13288) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %13290 = torch.operator "onnx.Mul"(%13289, %551) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %13291 = torch.operator "onnx.Cast"(%13275) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %13292 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %13293 = torch.operator "onnx.Pow"(%13291, %13292) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %13294 = torch.operator "onnx.ReduceMean"(%13293) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %13295 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %13296 = torch.operator "onnx.Add"(%13294, %13295) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %13297 = torch.operator "onnx.Sqrt"(%13296) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %13298 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %13299 = torch.operator "onnx.Div"(%13298, %13297) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %13300 = torch.operator "onnx.Cast"(%13275) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %13301 = torch.operator "onnx.Mul"(%13300, %13299) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %13302 = torch.operator "onnx.Cast"(%13301) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %13303 = torch.operator "onnx.Mul"(%13302, %552) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %13304 = torch.operator "onnx.Shape"(%13290) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %13305 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13306 = torch.operator "onnx.Gather"(%13304, %13305) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13307 = torch.operator "onnx.Shape"(%13290) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %13308 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13309 = torch.operator "onnx.Gather"(%13307, %13308) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13310 = torch.operator "onnx.Shape"(%13290) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %13311 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13312 = torch.operator "onnx.Gather"(%13310, %13311) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13313 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17855_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13314 = torch.operator "onnx.Unsqueeze"(%13306, %13313) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13315 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17857_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13316 = torch.operator "onnx.Unsqueeze"(%13309, %13315) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13317 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17859_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13318 = torch.operator "onnx.Unsqueeze"(%13312, %13317) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13319 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13320 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13321 = torch.operator "onnx.Concat"(%13314, %13316, %13318, %13319, %13320) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %13322 = torch.operator "onnx.Reshape"(%13290, %13321) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %13323 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %13324:2 = torch.operator "onnx.Split"(%13322, %13323) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %13325 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13326 = torch.operator "onnx.Squeeze"(%13324#0, %13325) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13327 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13328 = torch.operator "onnx.Squeeze"(%13324#1, %13327) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13329 = torch.operator "onnx.Neg"(%13328) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %13330 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13331 = torch.operator "onnx.Unsqueeze"(%13329, %13330) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %13332 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13333 = torch.operator "onnx.Unsqueeze"(%13326, %13332) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %13334 = torch.operator "onnx.Concat"(%13331, %13333) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %13335 = torch.operator "onnx.Shape"(%13334) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %13336 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13337 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13338 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13339 = torch.operator "onnx.Slice"(%13335, %13337, %13338, %13336) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %13340 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13341 = torch.operator "onnx.Concat"(%13339, %13340) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %13342 = torch.operator "onnx.Reshape"(%13334, %13341) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13343 = torch.operator "onnx.Cast"(%13290) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %13344 = torch.operator "onnx.Mul"(%13343, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %13345 = torch.operator "onnx.Cast"(%13342) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %13346 = torch.operator "onnx.Mul"(%13345, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %13347 = torch.operator "onnx.Add"(%13344, %13346) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %13348 = torch.operator "onnx.Cast"(%13347) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %13349 = torch.operator "onnx.Shape"(%13303) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %13350 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13351 = torch.operator "onnx.Gather"(%13349, %13350) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13352 = torch.operator "onnx.Shape"(%13303) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %13353 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13354 = torch.operator "onnx.Gather"(%13352, %13353) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13355 = torch.operator "onnx.Shape"(%13303) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %13356 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13357 = torch.operator "onnx.Gather"(%13355, %13356) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13358 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17900_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13359 = torch.operator "onnx.Unsqueeze"(%13351, %13358) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13360 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17902_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13361 = torch.operator "onnx.Unsqueeze"(%13354, %13360) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13362 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17904_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13363 = torch.operator "onnx.Unsqueeze"(%13357, %13362) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13364 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13365 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13366 = torch.operator "onnx.Concat"(%13359, %13361, %13363, %13364, %13365) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %13367 = torch.operator "onnx.Reshape"(%13303, %13366) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %13368 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %13369:2 = torch.operator "onnx.Split"(%13367, %13368) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %13370 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13371 = torch.operator "onnx.Squeeze"(%13369#0, %13370) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13372 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13373 = torch.operator "onnx.Squeeze"(%13369#1, %13372) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13374 = torch.operator "onnx.Neg"(%13373) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %13375 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13376 = torch.operator "onnx.Unsqueeze"(%13374, %13375) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %13377 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13378 = torch.operator "onnx.Unsqueeze"(%13371, %13377) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %13379 = torch.operator "onnx.Concat"(%13376, %13378) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %13380 = torch.operator "onnx.Shape"(%13379) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %13381 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13382 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13383 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13384 = torch.operator "onnx.Slice"(%13380, %13382, %13383, %13381) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %13385 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13386 = torch.operator "onnx.Concat"(%13384, %13385) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %13387 = torch.operator "onnx.Reshape"(%13379, %13386) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13388 = torch.operator "onnx.Cast"(%13303) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %13389 = torch.operator "onnx.Mul"(%13388, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %13390 = torch.operator "onnx.Cast"(%13387) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %13391 = torch.operator "onnx.Mul"(%13390, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %13392 = torch.operator "onnx.Add"(%13389, %13391) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %13393 = torch.operator "onnx.Cast"(%13392) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %13394 = torch.operator "onnx.Shape"(%13348) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %13395 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13396 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13397 = torch.operator "onnx.Slice"(%13394, %13395, %13396) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13398 = torch.operator "onnx.Cast"(%13397) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %13399 = torch.operator "onnx.Sqrt"(%13398) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %13400 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %13401 = torch.operator "onnx.Cast"(%13399) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %13402 = torch.operator "onnx.Div"(%13400, %13401) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %13403 = torch.operator "onnx.Cast"(%13402) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %13404 = torch.operator "onnx.Transpose"(%13393) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %13405 = torch.operator "onnx.Sqrt"(%13403) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %13406 = torch.operator "onnx.Mul"(%13348, %13405) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %13407 = torch.operator "onnx.Sqrt"(%13403) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %13408 = torch.operator "onnx.Mul"(%13404, %13407) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %13409 = torch.operator "onnx.MatMul"(%13406, %13408) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %13410 = torch.operator "onnx.Softmax"(%13409) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %13411 = torch.operator "onnx.MatMul"(%13410, %13277) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %13412 = torch.operator "onnx.Transpose"(%13411) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %13413 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13414 = torch.operator "onnx.Mul"(%13250, %13413) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13415 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17957_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13416 = torch.operator "onnx.Unsqueeze"(%13237, %13415) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13417 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13418 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_17960_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13419 = torch.operator "onnx.Unsqueeze"(%13414, %13418) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13420 = torch.operator "onnx.Concat"(%13416, %13417, %13419) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %13421 = torch.operator "onnx.Reshape"(%13412, %13420) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %13422 = torch.operator "onnx.Cast"(%13421) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %13423 = torch.operator "onnx.Concat"(%13422, %13234) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %13424 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.17_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13425 = torch.operator "onnx.Unsqueeze"(%13208, %13424) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %13426 = torch.operator "onnx.MatMul"(%13423, %1058) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13427 = torch.operator "onnx.Add"(%550, %13426) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13428 = torch.operator "onnx.Mul"(%13425, %13427) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13429 = torch.operator "onnx.Add"(%13190, %13428) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13430 = torch.operator "onnx.Gemm"(%1285, %556, %557) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %13431 = torch.operator "onnx.Shape"(%13430) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %13432 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13433 = torch.operator "onnx.Gather"(%13431, %13432) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13434 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13435 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13436 = torch.operator "onnx.Add"(%13433, %13435) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13437 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13438 = torch.operator "onnx.Div"(%13436, %13437) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13439 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13440 = torch.operator "onnx.Mul"(%13438, %13439) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13441 = torch.operator "onnx.Slice"(%13430, %13434, %13440, %13432) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %13442 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13443 = torch.operator "onnx.Mul"(%13438, %13442) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13444 = torch.operator "onnx.Slice"(%13430, %13440, %13443, %13432) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %13445 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13446 = torch.operator "onnx.Mul"(%13438, %13445) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13447 = torch.operator "onnx.Slice"(%13430, %13443, %13446, %13432) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.182Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.18/norm/norm/Constant_attr__value" : tensor<3072xbf16> %13448 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.182Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.182Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.18/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %13449 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.182Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %13450 = torch.operator "onnx.LayerNormalization"(%13429, %13448, %13449) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13451 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13452 = torch.operator "onnx.Unsqueeze"(%13444, %13451) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %13453 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %13454 = torch.operator "onnx.Add"(%13452, %13453) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %13455 = torch.operator "onnx.Mul"(%13450, %13454) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13456 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13457 = torch.operator "onnx.Unsqueeze"(%13441, %13456) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %13458 = torch.operator "onnx.Add"(%13455, %13457) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13459 = torch.operator "onnx.MatMul"(%13458, %1059) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13460 = torch.operator "onnx.Add"(%558, %13459) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13461 = torch.operator "onnx.Mul"(%13460, %13460) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13462 = torch.operator "onnx.Mul"(%13460, %13461) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13463 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %13464 = torch.operator "onnx.Mul"(%13463, %13462) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13465 = torch.operator "onnx.Add"(%13460, %13464) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13466 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %13467 = torch.operator "onnx.Mul"(%13466, %13465) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13468 = torch.operator "onnx.Tanh"(%13467) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13469 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %13470 = torch.operator "onnx.Add"(%13469, %13468) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13471 = torch.operator "onnx.Mul"(%13460, %13470) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13472 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %13473 = torch.operator "onnx.Mul"(%13472, %13471) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13474 = torch.operator "onnx.Shape"(%13458) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %13475 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13476 = torch.operator "onnx.Gather"(%13474, %13475) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13477 = torch.operator "onnx.MatMul"(%13458, %1060) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13478 = torch.operator "onnx.Add"(%562, %13477) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13479 = torch.operator "onnx.MatMul"(%13458, %1061) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13480 = torch.operator "onnx.Add"(%563, %13479) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13481 = torch.operator "onnx.MatMul"(%13458, %1062) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13482 = torch.operator "onnx.Add"(%564, %13481) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13483 = torch.operator "onnx.Shape"(%13480) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %13484 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13485 = torch.operator "onnx.Gather"(%13483, %13484) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13486 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13487 = torch.operator "onnx.Div"(%13485, %13486) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13488 = torch.operator "onnx.Cast"(%13487) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13489 = torch.operator "onnx.Cast"(%13488) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13490 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18032_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13491 = torch.operator "onnx.Unsqueeze"(%13476, %13490) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13492 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13493 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13494 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18036_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13495 = torch.operator "onnx.Unsqueeze"(%13489, %13494) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13496 = torch.operator "onnx.Concat"(%13491, %13492, %13493, %13495) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %13497 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18039_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13498 = torch.operator "onnx.Unsqueeze"(%13476, %13497) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13499 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13500 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13501 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18043_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13502 = torch.operator "onnx.Unsqueeze"(%13489, %13501) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13503 = torch.operator "onnx.Concat"(%13498, %13499, %13500, %13502) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %13504 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18046_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13505 = torch.operator "onnx.Unsqueeze"(%13476, %13504) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13506 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13507 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13508 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18050_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13509 = torch.operator "onnx.Unsqueeze"(%13489, %13508) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13510 = torch.operator "onnx.Concat"(%13505, %13506, %13507, %13509) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %13511 = torch.operator "onnx.Reshape"(%13478, %13496) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13512 = torch.operator "onnx.Transpose"(%13511) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %13513 = torch.operator "onnx.Reshape"(%13480, %13503) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13514 = torch.operator "onnx.Transpose"(%13513) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %13515 = torch.operator "onnx.Reshape"(%13482, %13510) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13516 = torch.operator "onnx.Transpose"(%13515) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %13517 = torch.operator "onnx.Cast"(%13512) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %13518 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %13519 = torch.operator "onnx.Pow"(%13517, %13518) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %13520 = torch.operator "onnx.ReduceMean"(%13519) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %13521 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %13522 = torch.operator "onnx.Add"(%13520, %13521) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %13523 = torch.operator "onnx.Sqrt"(%13522) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %13524 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %13525 = torch.operator "onnx.Div"(%13524, %13523) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %13526 = torch.operator "onnx.Cast"(%13512) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %13527 = torch.operator "onnx.Mul"(%13526, %13525) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %13528 = torch.operator "onnx.Cast"(%13527) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %13529 = torch.operator "onnx.Mul"(%13528, %560) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %13530 = torch.operator "onnx.Cast"(%13514) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %13531 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %13532 = torch.operator "onnx.Pow"(%13530, %13531) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %13533 = torch.operator "onnx.ReduceMean"(%13532) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %13534 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %13535 = torch.operator "onnx.Add"(%13533, %13534) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %13536 = torch.operator "onnx.Sqrt"(%13535) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %13537 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %13538 = torch.operator "onnx.Div"(%13537, %13536) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %13539 = torch.operator "onnx.Cast"(%13514) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %13540 = torch.operator "onnx.Mul"(%13539, %13538) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %13541 = torch.operator "onnx.Cast"(%13540) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %13542 = torch.operator "onnx.Mul"(%13541, %561) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %13543 = torch.operator "onnx.Shape"(%13529) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %13544 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13545 = torch.operator "onnx.Gather"(%13543, %13544) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13546 = torch.operator "onnx.Shape"(%13529) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %13547 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13548 = torch.operator "onnx.Gather"(%13546, %13547) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13549 = torch.operator "onnx.Shape"(%13529) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %13550 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13551 = torch.operator "onnx.Gather"(%13549, %13550) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13552 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18094_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13553 = torch.operator "onnx.Unsqueeze"(%13545, %13552) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13554 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18096_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13555 = torch.operator "onnx.Unsqueeze"(%13548, %13554) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13556 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18098_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13557 = torch.operator "onnx.Unsqueeze"(%13551, %13556) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13558 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13559 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13560 = torch.operator "onnx.Concat"(%13553, %13555, %13557, %13558, %13559) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %13561 = torch.operator "onnx.Reshape"(%13529, %13560) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %13562 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %13563:2 = torch.operator "onnx.Split"(%13561, %13562) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %13564 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13565 = torch.operator "onnx.Squeeze"(%13563#0, %13564) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13566 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13567 = torch.operator "onnx.Squeeze"(%13563#1, %13566) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13568 = torch.operator "onnx.Neg"(%13567) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %13569 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13570 = torch.operator "onnx.Unsqueeze"(%13568, %13569) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %13571 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13572 = torch.operator "onnx.Unsqueeze"(%13565, %13571) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %13573 = torch.operator "onnx.Concat"(%13570, %13572) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %13574 = torch.operator "onnx.Shape"(%13573) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %13575 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13576 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13577 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13578 = torch.operator "onnx.Slice"(%13574, %13576, %13577, %13575) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %13579 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13580 = torch.operator "onnx.Concat"(%13578, %13579) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %13581 = torch.operator "onnx.Reshape"(%13573, %13580) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13582 = torch.operator "onnx.Cast"(%13529) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %13583 = torch.operator "onnx.Mul"(%13582, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %13584 = torch.operator "onnx.Cast"(%13581) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %13585 = torch.operator "onnx.Mul"(%13584, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %13586 = torch.operator "onnx.Add"(%13583, %13585) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %13587 = torch.operator "onnx.Cast"(%13586) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %13588 = torch.operator "onnx.Shape"(%13542) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %13589 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13590 = torch.operator "onnx.Gather"(%13588, %13589) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13591 = torch.operator "onnx.Shape"(%13542) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %13592 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13593 = torch.operator "onnx.Gather"(%13591, %13592) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13594 = torch.operator "onnx.Shape"(%13542) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %13595 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13596 = torch.operator "onnx.Gather"(%13594, %13595) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13597 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18139_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13598 = torch.operator "onnx.Unsqueeze"(%13590, %13597) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13599 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18141_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13600 = torch.operator "onnx.Unsqueeze"(%13593, %13599) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13601 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18143_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13602 = torch.operator "onnx.Unsqueeze"(%13596, %13601) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13603 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13604 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13605 = torch.operator "onnx.Concat"(%13598, %13600, %13602, %13603, %13604) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %13606 = torch.operator "onnx.Reshape"(%13542, %13605) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %13607 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %13608:2 = torch.operator "onnx.Split"(%13606, %13607) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %13609 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13610 = torch.operator "onnx.Squeeze"(%13608#0, %13609) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13611 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13612 = torch.operator "onnx.Squeeze"(%13608#1, %13611) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13613 = torch.operator "onnx.Neg"(%13612) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %13614 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13615 = torch.operator "onnx.Unsqueeze"(%13613, %13614) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %13616 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13617 = torch.operator "onnx.Unsqueeze"(%13610, %13616) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %13618 = torch.operator "onnx.Concat"(%13615, %13617) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %13619 = torch.operator "onnx.Shape"(%13618) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %13620 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13621 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13622 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13623 = torch.operator "onnx.Slice"(%13619, %13621, %13622, %13620) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %13624 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13625 = torch.operator "onnx.Concat"(%13623, %13624) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %13626 = torch.operator "onnx.Reshape"(%13618, %13625) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13627 = torch.operator "onnx.Cast"(%13542) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %13628 = torch.operator "onnx.Mul"(%13627, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %13629 = torch.operator "onnx.Cast"(%13626) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %13630 = torch.operator "onnx.Mul"(%13629, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %13631 = torch.operator "onnx.Add"(%13628, %13630) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %13632 = torch.operator "onnx.Cast"(%13631) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %13633 = torch.operator "onnx.Shape"(%13587) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %13634 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13635 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13636 = torch.operator "onnx.Slice"(%13633, %13634, %13635) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13637 = torch.operator "onnx.Cast"(%13636) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %13638 = torch.operator "onnx.Sqrt"(%13637) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %13639 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %13640 = torch.operator "onnx.Cast"(%13638) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %13641 = torch.operator "onnx.Div"(%13639, %13640) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %13642 = torch.operator "onnx.Cast"(%13641) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %13643 = torch.operator "onnx.Transpose"(%13632) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %13644 = torch.operator "onnx.Sqrt"(%13642) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %13645 = torch.operator "onnx.Mul"(%13587, %13644) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %13646 = torch.operator "onnx.Sqrt"(%13642) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %13647 = torch.operator "onnx.Mul"(%13643, %13646) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %13648 = torch.operator "onnx.MatMul"(%13645, %13647) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %13649 = torch.operator "onnx.Softmax"(%13648) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %13650 = torch.operator "onnx.MatMul"(%13649, %13516) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %13651 = torch.operator "onnx.Transpose"(%13650) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %13652 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13653 = torch.operator "onnx.Mul"(%13489, %13652) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13654 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18196_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13655 = torch.operator "onnx.Unsqueeze"(%13476, %13654) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13656 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13657 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18199_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13658 = torch.operator "onnx.Unsqueeze"(%13653, %13657) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13659 = torch.operator "onnx.Concat"(%13655, %13656, %13658) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %13660 = torch.operator "onnx.Reshape"(%13651, %13659) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %13661 = torch.operator "onnx.Cast"(%13660) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %13662 = torch.operator "onnx.Concat"(%13661, %13473) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %13663 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.18_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13664 = torch.operator "onnx.Unsqueeze"(%13447, %13663) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %13665 = torch.operator "onnx.MatMul"(%13662, %1063) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13666 = torch.operator "onnx.Add"(%559, %13665) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13667 = torch.operator "onnx.Mul"(%13664, %13666) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13668 = torch.operator "onnx.Add"(%13429, %13667) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13669 = torch.operator "onnx.Gemm"(%1285, %565, %566) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %13670 = torch.operator "onnx.Shape"(%13669) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %13671 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13672 = torch.operator "onnx.Gather"(%13670, %13671) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13673 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13674 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13675 = torch.operator "onnx.Add"(%13672, %13674) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13676 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13677 = torch.operator "onnx.Div"(%13675, %13676) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13678 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13679 = torch.operator "onnx.Mul"(%13677, %13678) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13680 = torch.operator "onnx.Slice"(%13669, %13673, %13679, %13671) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %13681 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13682 = torch.operator "onnx.Mul"(%13677, %13681) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13683 = torch.operator "onnx.Slice"(%13669, %13679, %13682, %13671) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %13684 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13685 = torch.operator "onnx.Mul"(%13677, %13684) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13686 = torch.operator "onnx.Slice"(%13669, %13682, %13685, %13671) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.192Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.19/norm/norm/Constant_attr__value" : tensor<3072xbf16> %13687 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.192Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.192Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.19/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %13688 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.192Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %13689 = torch.operator "onnx.LayerNormalization"(%13668, %13687, %13688) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13690 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13691 = torch.operator "onnx.Unsqueeze"(%13683, %13690) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %13692 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %13693 = torch.operator "onnx.Add"(%13691, %13692) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %13694 = torch.operator "onnx.Mul"(%13689, %13693) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13695 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13696 = torch.operator "onnx.Unsqueeze"(%13680, %13695) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %13697 = torch.operator "onnx.Add"(%13694, %13696) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13698 = torch.operator "onnx.MatMul"(%13697, %1064) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13699 = torch.operator "onnx.Add"(%567, %13698) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13700 = torch.operator "onnx.Mul"(%13699, %13699) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13701 = torch.operator "onnx.Mul"(%13699, %13700) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13702 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %13703 = torch.operator "onnx.Mul"(%13702, %13701) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13704 = torch.operator "onnx.Add"(%13699, %13703) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13705 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %13706 = torch.operator "onnx.Mul"(%13705, %13704) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13707 = torch.operator "onnx.Tanh"(%13706) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13708 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %13709 = torch.operator "onnx.Add"(%13708, %13707) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13710 = torch.operator "onnx.Mul"(%13699, %13709) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13711 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %13712 = torch.operator "onnx.Mul"(%13711, %13710) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13713 = torch.operator "onnx.Shape"(%13697) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %13714 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13715 = torch.operator "onnx.Gather"(%13713, %13714) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13716 = torch.operator "onnx.MatMul"(%13697, %1065) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13717 = torch.operator "onnx.Add"(%571, %13716) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13718 = torch.operator "onnx.MatMul"(%13697, %1066) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13719 = torch.operator "onnx.Add"(%572, %13718) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13720 = torch.operator "onnx.MatMul"(%13697, %1067) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13721 = torch.operator "onnx.Add"(%573, %13720) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13722 = torch.operator "onnx.Shape"(%13719) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %13723 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13724 = torch.operator "onnx.Gather"(%13722, %13723) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13725 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13726 = torch.operator "onnx.Div"(%13724, %13725) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13727 = torch.operator "onnx.Cast"(%13726) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13728 = torch.operator "onnx.Cast"(%13727) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13729 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18271_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13730 = torch.operator "onnx.Unsqueeze"(%13715, %13729) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13731 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13732 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13733 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18275_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13734 = torch.operator "onnx.Unsqueeze"(%13728, %13733) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13735 = torch.operator "onnx.Concat"(%13730, %13731, %13732, %13734) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %13736 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18278_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13737 = torch.operator "onnx.Unsqueeze"(%13715, %13736) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13738 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13739 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13740 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18282_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13741 = torch.operator "onnx.Unsqueeze"(%13728, %13740) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13742 = torch.operator "onnx.Concat"(%13737, %13738, %13739, %13741) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %13743 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18285_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13744 = torch.operator "onnx.Unsqueeze"(%13715, %13743) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13745 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13746 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13747 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18289_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13748 = torch.operator "onnx.Unsqueeze"(%13728, %13747) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13749 = torch.operator "onnx.Concat"(%13744, %13745, %13746, %13748) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %13750 = torch.operator "onnx.Reshape"(%13717, %13735) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13751 = torch.operator "onnx.Transpose"(%13750) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %13752 = torch.operator "onnx.Reshape"(%13719, %13742) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13753 = torch.operator "onnx.Transpose"(%13752) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %13754 = torch.operator "onnx.Reshape"(%13721, %13749) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13755 = torch.operator "onnx.Transpose"(%13754) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %13756 = torch.operator "onnx.Cast"(%13751) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %13757 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %13758 = torch.operator "onnx.Pow"(%13756, %13757) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %13759 = torch.operator "onnx.ReduceMean"(%13758) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %13760 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %13761 = torch.operator "onnx.Add"(%13759, %13760) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %13762 = torch.operator "onnx.Sqrt"(%13761) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %13763 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %13764 = torch.operator "onnx.Div"(%13763, %13762) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %13765 = torch.operator "onnx.Cast"(%13751) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %13766 = torch.operator "onnx.Mul"(%13765, %13764) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %13767 = torch.operator "onnx.Cast"(%13766) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %13768 = torch.operator "onnx.Mul"(%13767, %569) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %13769 = torch.operator "onnx.Cast"(%13753) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %13770 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %13771 = torch.operator "onnx.Pow"(%13769, %13770) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %13772 = torch.operator "onnx.ReduceMean"(%13771) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %13773 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %13774 = torch.operator "onnx.Add"(%13772, %13773) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %13775 = torch.operator "onnx.Sqrt"(%13774) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %13776 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %13777 = torch.operator "onnx.Div"(%13776, %13775) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %13778 = torch.operator "onnx.Cast"(%13753) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %13779 = torch.operator "onnx.Mul"(%13778, %13777) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %13780 = torch.operator "onnx.Cast"(%13779) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %13781 = torch.operator "onnx.Mul"(%13780, %570) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %13782 = torch.operator "onnx.Shape"(%13768) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %13783 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13784 = torch.operator "onnx.Gather"(%13782, %13783) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13785 = torch.operator "onnx.Shape"(%13768) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %13786 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13787 = torch.operator "onnx.Gather"(%13785, %13786) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13788 = torch.operator "onnx.Shape"(%13768) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %13789 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13790 = torch.operator "onnx.Gather"(%13788, %13789) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13791 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18333_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13792 = torch.operator "onnx.Unsqueeze"(%13784, %13791) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13793 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18335_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13794 = torch.operator "onnx.Unsqueeze"(%13787, %13793) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13795 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18337_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13796 = torch.operator "onnx.Unsqueeze"(%13790, %13795) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13797 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13798 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13799 = torch.operator "onnx.Concat"(%13792, %13794, %13796, %13797, %13798) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %13800 = torch.operator "onnx.Reshape"(%13768, %13799) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %13801 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %13802:2 = torch.operator "onnx.Split"(%13800, %13801) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %13803 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13804 = torch.operator "onnx.Squeeze"(%13802#0, %13803) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13805 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13806 = torch.operator "onnx.Squeeze"(%13802#1, %13805) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13807 = torch.operator "onnx.Neg"(%13806) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %13808 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13809 = torch.operator "onnx.Unsqueeze"(%13807, %13808) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %13810 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13811 = torch.operator "onnx.Unsqueeze"(%13804, %13810) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %13812 = torch.operator "onnx.Concat"(%13809, %13811) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %13813 = torch.operator "onnx.Shape"(%13812) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %13814 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13815 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13816 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13817 = torch.operator "onnx.Slice"(%13813, %13815, %13816, %13814) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %13818 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13819 = torch.operator "onnx.Concat"(%13817, %13818) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %13820 = torch.operator "onnx.Reshape"(%13812, %13819) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13821 = torch.operator "onnx.Cast"(%13768) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %13822 = torch.operator "onnx.Mul"(%13821, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %13823 = torch.operator "onnx.Cast"(%13820) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %13824 = torch.operator "onnx.Mul"(%13823, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %13825 = torch.operator "onnx.Add"(%13822, %13824) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %13826 = torch.operator "onnx.Cast"(%13825) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %13827 = torch.operator "onnx.Shape"(%13781) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %13828 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13829 = torch.operator "onnx.Gather"(%13827, %13828) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13830 = torch.operator "onnx.Shape"(%13781) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %13831 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13832 = torch.operator "onnx.Gather"(%13830, %13831) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13833 = torch.operator "onnx.Shape"(%13781) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %13834 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13835 = torch.operator "onnx.Gather"(%13833, %13834) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13836 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18378_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13837 = torch.operator "onnx.Unsqueeze"(%13829, %13836) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13838 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18380_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13839 = torch.operator "onnx.Unsqueeze"(%13832, %13838) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13840 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18382_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13841 = torch.operator "onnx.Unsqueeze"(%13835, %13840) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13842 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13843 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13844 = torch.operator "onnx.Concat"(%13837, %13839, %13841, %13842, %13843) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %13845 = torch.operator "onnx.Reshape"(%13781, %13844) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %13846 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %13847:2 = torch.operator "onnx.Split"(%13845, %13846) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %13848 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13849 = torch.operator "onnx.Squeeze"(%13847#0, %13848) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13850 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13851 = torch.operator "onnx.Squeeze"(%13847#1, %13850) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13852 = torch.operator "onnx.Neg"(%13851) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %13853 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13854 = torch.operator "onnx.Unsqueeze"(%13852, %13853) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %13855 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13856 = torch.operator "onnx.Unsqueeze"(%13849, %13855) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %13857 = torch.operator "onnx.Concat"(%13854, %13856) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %13858 = torch.operator "onnx.Shape"(%13857) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %13859 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13860 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13861 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13862 = torch.operator "onnx.Slice"(%13858, %13860, %13861, %13859) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %13863 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13864 = torch.operator "onnx.Concat"(%13862, %13863) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %13865 = torch.operator "onnx.Reshape"(%13857, %13864) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13866 = torch.operator "onnx.Cast"(%13781) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %13867 = torch.operator "onnx.Mul"(%13866, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %13868 = torch.operator "onnx.Cast"(%13865) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %13869 = torch.operator "onnx.Mul"(%13868, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %13870 = torch.operator "onnx.Add"(%13867, %13869) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %13871 = torch.operator "onnx.Cast"(%13870) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %13872 = torch.operator "onnx.Shape"(%13826) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %13873 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13874 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13875 = torch.operator "onnx.Slice"(%13872, %13873, %13874) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13876 = torch.operator "onnx.Cast"(%13875) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %13877 = torch.operator "onnx.Sqrt"(%13876) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %13878 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %13879 = torch.operator "onnx.Cast"(%13877) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %13880 = torch.operator "onnx.Div"(%13878, %13879) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %13881 = torch.operator "onnx.Cast"(%13880) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %13882 = torch.operator "onnx.Transpose"(%13871) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %13883 = torch.operator "onnx.Sqrt"(%13881) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %13884 = torch.operator "onnx.Mul"(%13826, %13883) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %13885 = torch.operator "onnx.Sqrt"(%13881) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %13886 = torch.operator "onnx.Mul"(%13882, %13885) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %13887 = torch.operator "onnx.MatMul"(%13884, %13886) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %13888 = torch.operator "onnx.Softmax"(%13887) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %13889 = torch.operator "onnx.MatMul"(%13888, %13755) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %13890 = torch.operator "onnx.Transpose"(%13889) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %13891 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13892 = torch.operator "onnx.Mul"(%13728, %13891) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13893 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18435_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13894 = torch.operator "onnx.Unsqueeze"(%13715, %13893) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13895 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13896 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18438_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13897 = torch.operator "onnx.Unsqueeze"(%13892, %13896) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13898 = torch.operator "onnx.Concat"(%13894, %13895, %13897) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %13899 = torch.operator "onnx.Reshape"(%13890, %13898) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %13900 = torch.operator "onnx.Cast"(%13899) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %13901 = torch.operator "onnx.Concat"(%13900, %13712) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %13902 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.19_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13903 = torch.operator "onnx.Unsqueeze"(%13686, %13902) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %13904 = torch.operator "onnx.MatMul"(%13901, %1068) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13905 = torch.operator "onnx.Add"(%568, %13904) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13906 = torch.operator "onnx.Mul"(%13903, %13905) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13907 = torch.operator "onnx.Add"(%13668, %13906) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13908 = torch.operator "onnx.Gemm"(%1285, %574, %575) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %13909 = torch.operator "onnx.Shape"(%13908) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %13910 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13911 = torch.operator "onnx.Gather"(%13909, %13910) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13912 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13913 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13914 = torch.operator "onnx.Add"(%13911, %13913) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13915 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13916 = torch.operator "onnx.Div"(%13914, %13915) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13917 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13918 = torch.operator "onnx.Mul"(%13916, %13917) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13919 = torch.operator "onnx.Slice"(%13908, %13912, %13918, %13910) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %13920 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13921 = torch.operator "onnx.Mul"(%13916, %13920) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13922 = torch.operator "onnx.Slice"(%13908, %13918, %13921, %13910) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %13923 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13924 = torch.operator "onnx.Mul"(%13916, %13923) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13925 = torch.operator "onnx.Slice"(%13908, %13921, %13924, %13910) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.202Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.20/norm/norm/Constant_attr__value" : tensor<3072xbf16> %13926 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.202Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.202Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.20/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %13927 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.202Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %13928 = torch.operator "onnx.LayerNormalization"(%13907, %13926, %13927) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13929 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13930 = torch.operator "onnx.Unsqueeze"(%13922, %13929) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %13931 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %13932 = torch.operator "onnx.Add"(%13930, %13931) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %13933 = torch.operator "onnx.Mul"(%13928, %13932) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13934 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13935 = torch.operator "onnx.Unsqueeze"(%13919, %13934) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %13936 = torch.operator "onnx.Add"(%13933, %13935) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13937 = torch.operator "onnx.MatMul"(%13936, %1069) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13938 = torch.operator "onnx.Add"(%576, %13937) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13939 = torch.operator "onnx.Mul"(%13938, %13938) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13940 = torch.operator "onnx.Mul"(%13938, %13939) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13941 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %13942 = torch.operator "onnx.Mul"(%13941, %13940) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13943 = torch.operator "onnx.Add"(%13938, %13942) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13944 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %13945 = torch.operator "onnx.Mul"(%13944, %13943) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13946 = torch.operator "onnx.Tanh"(%13945) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13947 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %13948 = torch.operator "onnx.Add"(%13947, %13946) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13949 = torch.operator "onnx.Mul"(%13938, %13948) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13950 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %13951 = torch.operator "onnx.Mul"(%13950, %13949) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %13952 = torch.operator "onnx.Shape"(%13936) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %13953 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13954 = torch.operator "onnx.Gather"(%13952, %13953) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13955 = torch.operator "onnx.MatMul"(%13936, %1070) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13956 = torch.operator "onnx.Add"(%580, %13955) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13957 = torch.operator "onnx.MatMul"(%13936, %1071) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13958 = torch.operator "onnx.Add"(%581, %13957) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13959 = torch.operator "onnx.MatMul"(%13936, %1072) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13960 = torch.operator "onnx.Add"(%582, %13959) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %13961 = torch.operator "onnx.Shape"(%13958) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %13962 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13963 = torch.operator "onnx.Gather"(%13961, %13962) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13964 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %13965 = torch.operator "onnx.Div"(%13963, %13964) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13966 = torch.operator "onnx.Cast"(%13965) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13967 = torch.operator "onnx.Cast"(%13966) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %13968 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18510_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13969 = torch.operator "onnx.Unsqueeze"(%13954, %13968) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13970 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13971 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13972 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18514_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13973 = torch.operator "onnx.Unsqueeze"(%13967, %13972) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13974 = torch.operator "onnx.Concat"(%13969, %13970, %13971, %13973) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %13975 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18517_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13976 = torch.operator "onnx.Unsqueeze"(%13954, %13975) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13977 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13978 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13979 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18521_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13980 = torch.operator "onnx.Unsqueeze"(%13967, %13979) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13981 = torch.operator "onnx.Concat"(%13976, %13977, %13978, %13980) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %13982 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18524_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13983 = torch.operator "onnx.Unsqueeze"(%13954, %13982) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13984 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13985 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13986 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18528_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %13987 = torch.operator "onnx.Unsqueeze"(%13967, %13986) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %13988 = torch.operator "onnx.Concat"(%13983, %13984, %13985, %13987) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %13989 = torch.operator "onnx.Reshape"(%13956, %13974) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13990 = torch.operator "onnx.Transpose"(%13989) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %13991 = torch.operator "onnx.Reshape"(%13958, %13981) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13992 = torch.operator "onnx.Transpose"(%13991) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %13993 = torch.operator "onnx.Reshape"(%13960, %13988) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %13994 = torch.operator "onnx.Transpose"(%13993) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %13995 = torch.operator "onnx.Cast"(%13990) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %13996 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %13997 = torch.operator "onnx.Pow"(%13995, %13996) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %13998 = torch.operator "onnx.ReduceMean"(%13997) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %13999 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %14000 = torch.operator "onnx.Add"(%13998, %13999) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14001 = torch.operator "onnx.Sqrt"(%14000) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14002 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %14003 = torch.operator "onnx.Div"(%14002, %14001) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14004 = torch.operator "onnx.Cast"(%13990) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %14005 = torch.operator "onnx.Mul"(%14004, %14003) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %14006 = torch.operator "onnx.Cast"(%14005) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %14007 = torch.operator "onnx.Mul"(%14006, %578) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %14008 = torch.operator "onnx.Cast"(%13992) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %14009 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %14010 = torch.operator "onnx.Pow"(%14008, %14009) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %14011 = torch.operator "onnx.ReduceMean"(%14010) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14012 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %14013 = torch.operator "onnx.Add"(%14011, %14012) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14014 = torch.operator "onnx.Sqrt"(%14013) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14015 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %14016 = torch.operator "onnx.Div"(%14015, %14014) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14017 = torch.operator "onnx.Cast"(%13992) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %14018 = torch.operator "onnx.Mul"(%14017, %14016) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %14019 = torch.operator "onnx.Cast"(%14018) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %14020 = torch.operator "onnx.Mul"(%14019, %579) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %14021 = torch.operator "onnx.Shape"(%14007) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %14022 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14023 = torch.operator "onnx.Gather"(%14021, %14022) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14024 = torch.operator "onnx.Shape"(%14007) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %14025 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14026 = torch.operator "onnx.Gather"(%14024, %14025) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14027 = torch.operator "onnx.Shape"(%14007) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %14028 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14029 = torch.operator "onnx.Gather"(%14027, %14028) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14030 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18572_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14031 = torch.operator "onnx.Unsqueeze"(%14023, %14030) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14032 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18574_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14033 = torch.operator "onnx.Unsqueeze"(%14026, %14032) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14034 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18576_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14035 = torch.operator "onnx.Unsqueeze"(%14029, %14034) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14036 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14037 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14038 = torch.operator "onnx.Concat"(%14031, %14033, %14035, %14036, %14037) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %14039 = torch.operator "onnx.Reshape"(%14007, %14038) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %14040 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %14041:2 = torch.operator "onnx.Split"(%14039, %14040) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %14042 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14043 = torch.operator "onnx.Squeeze"(%14041#0, %14042) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %14044 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14045 = torch.operator "onnx.Squeeze"(%14041#1, %14044) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %14046 = torch.operator "onnx.Neg"(%14045) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %14047 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14048 = torch.operator "onnx.Unsqueeze"(%14046, %14047) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %14049 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14050 = torch.operator "onnx.Unsqueeze"(%14043, %14049) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %14051 = torch.operator "onnx.Concat"(%14048, %14050) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %14052 = torch.operator "onnx.Shape"(%14051) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %14053 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14054 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14055 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14056 = torch.operator "onnx.Slice"(%14052, %14054, %14055, %14053) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %14057 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14058 = torch.operator "onnx.Concat"(%14056, %14057) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %14059 = torch.operator "onnx.Reshape"(%14051, %14058) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %14060 = torch.operator "onnx.Cast"(%14007) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %14061 = torch.operator "onnx.Mul"(%14060, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %14062 = torch.operator "onnx.Cast"(%14059) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %14063 = torch.operator "onnx.Mul"(%14062, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %14064 = torch.operator "onnx.Add"(%14061, %14063) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %14065 = torch.operator "onnx.Cast"(%14064) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %14066 = torch.operator "onnx.Shape"(%14020) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %14067 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14068 = torch.operator "onnx.Gather"(%14066, %14067) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14069 = torch.operator "onnx.Shape"(%14020) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %14070 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14071 = torch.operator "onnx.Gather"(%14069, %14070) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14072 = torch.operator "onnx.Shape"(%14020) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %14073 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14074 = torch.operator "onnx.Gather"(%14072, %14073) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14075 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18617_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14076 = torch.operator "onnx.Unsqueeze"(%14068, %14075) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14077 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18619_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14078 = torch.operator "onnx.Unsqueeze"(%14071, %14077) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14079 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18621_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14080 = torch.operator "onnx.Unsqueeze"(%14074, %14079) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14081 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14082 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14083 = torch.operator "onnx.Concat"(%14076, %14078, %14080, %14081, %14082) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %14084 = torch.operator "onnx.Reshape"(%14020, %14083) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %14085 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %14086:2 = torch.operator "onnx.Split"(%14084, %14085) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %14087 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14088 = torch.operator "onnx.Squeeze"(%14086#0, %14087) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %14089 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14090 = torch.operator "onnx.Squeeze"(%14086#1, %14089) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %14091 = torch.operator "onnx.Neg"(%14090) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %14092 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14093 = torch.operator "onnx.Unsqueeze"(%14091, %14092) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %14094 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14095 = torch.operator "onnx.Unsqueeze"(%14088, %14094) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %14096 = torch.operator "onnx.Concat"(%14093, %14095) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %14097 = torch.operator "onnx.Shape"(%14096) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %14098 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14099 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14100 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14101 = torch.operator "onnx.Slice"(%14097, %14099, %14100, %14098) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %14102 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14103 = torch.operator "onnx.Concat"(%14101, %14102) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %14104 = torch.operator "onnx.Reshape"(%14096, %14103) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %14105 = torch.operator "onnx.Cast"(%14020) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %14106 = torch.operator "onnx.Mul"(%14105, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %14107 = torch.operator "onnx.Cast"(%14104) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %14108 = torch.operator "onnx.Mul"(%14107, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %14109 = torch.operator "onnx.Add"(%14106, %14108) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %14110 = torch.operator "onnx.Cast"(%14109) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %14111 = torch.operator "onnx.Shape"(%14065) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %14112 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14113 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14114 = torch.operator "onnx.Slice"(%14111, %14112, %14113) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14115 = torch.operator "onnx.Cast"(%14114) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %14116 = torch.operator "onnx.Sqrt"(%14115) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %14117 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %14118 = torch.operator "onnx.Cast"(%14116) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %14119 = torch.operator "onnx.Div"(%14117, %14118) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %14120 = torch.operator "onnx.Cast"(%14119) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %14121 = torch.operator "onnx.Transpose"(%14110) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %14122 = torch.operator "onnx.Sqrt"(%14120) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %14123 = torch.operator "onnx.Mul"(%14065, %14122) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %14124 = torch.operator "onnx.Sqrt"(%14120) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %14125 = torch.operator "onnx.Mul"(%14121, %14124) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %14126 = torch.operator "onnx.MatMul"(%14123, %14125) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %14127 = torch.operator "onnx.Softmax"(%14126) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %14128 = torch.operator "onnx.MatMul"(%14127, %13994) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %14129 = torch.operator "onnx.Transpose"(%14128) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %14130 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14131 = torch.operator "onnx.Mul"(%13967, %14130) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14132 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18674_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14133 = torch.operator "onnx.Unsqueeze"(%13954, %14132) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14134 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14135 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18677_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14136 = torch.operator "onnx.Unsqueeze"(%14131, %14135) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14137 = torch.operator "onnx.Concat"(%14133, %14134, %14136) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %14138 = torch.operator "onnx.Reshape"(%14129, %14137) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %14139 = torch.operator "onnx.Cast"(%14138) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %14140 = torch.operator "onnx.Concat"(%14139, %13951) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %14141 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.20_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14142 = torch.operator "onnx.Unsqueeze"(%13925, %14141) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %14143 = torch.operator "onnx.MatMul"(%14140, %1073) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14144 = torch.operator "onnx.Add"(%577, %14143) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14145 = torch.operator "onnx.Mul"(%14142, %14144) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14146 = torch.operator "onnx.Add"(%13907, %14145) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14147 = torch.operator "onnx.Gemm"(%1285, %583, %584) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %14148 = torch.operator "onnx.Shape"(%14147) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %14149 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14150 = torch.operator "onnx.Gather"(%14148, %14149) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14151 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14152 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14153 = torch.operator "onnx.Add"(%14150, %14152) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14154 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14155 = torch.operator "onnx.Div"(%14153, %14154) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14156 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14157 = torch.operator "onnx.Mul"(%14155, %14156) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14158 = torch.operator "onnx.Slice"(%14147, %14151, %14157, %14149) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %14159 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14160 = torch.operator "onnx.Mul"(%14155, %14159) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14161 = torch.operator "onnx.Slice"(%14147, %14157, %14160, %14149) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %14162 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14163 = torch.operator "onnx.Mul"(%14155, %14162) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14164 = torch.operator "onnx.Slice"(%14147, %14160, %14163, %14149) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.212Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.21/norm/norm/Constant_attr__value" : tensor<3072xbf16> %14165 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.212Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.212Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.21/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %14166 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.212Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %14167 = torch.operator "onnx.LayerNormalization"(%14146, %14165, %14166) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14168 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14169 = torch.operator "onnx.Unsqueeze"(%14161, %14168) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %14170 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %14171 = torch.operator "onnx.Add"(%14169, %14170) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %14172 = torch.operator "onnx.Mul"(%14167, %14171) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14173 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14174 = torch.operator "onnx.Unsqueeze"(%14158, %14173) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %14175 = torch.operator "onnx.Add"(%14172, %14174) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14176 = torch.operator "onnx.MatMul"(%14175, %1074) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14177 = torch.operator "onnx.Add"(%585, %14176) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14178 = torch.operator "onnx.Mul"(%14177, %14177) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14179 = torch.operator "onnx.Mul"(%14177, %14178) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14180 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %14181 = torch.operator "onnx.Mul"(%14180, %14179) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14182 = torch.operator "onnx.Add"(%14177, %14181) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14183 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %14184 = torch.operator "onnx.Mul"(%14183, %14182) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14185 = torch.operator "onnx.Tanh"(%14184) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14186 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %14187 = torch.operator "onnx.Add"(%14186, %14185) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14188 = torch.operator "onnx.Mul"(%14177, %14187) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14189 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %14190 = torch.operator "onnx.Mul"(%14189, %14188) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14191 = torch.operator "onnx.Shape"(%14175) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %14192 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14193 = torch.operator "onnx.Gather"(%14191, %14192) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14194 = torch.operator "onnx.MatMul"(%14175, %1075) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14195 = torch.operator "onnx.Add"(%589, %14194) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14196 = torch.operator "onnx.MatMul"(%14175, %1076) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14197 = torch.operator "onnx.Add"(%590, %14196) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14198 = torch.operator "onnx.MatMul"(%14175, %1077) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14199 = torch.operator "onnx.Add"(%591, %14198) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14200 = torch.operator "onnx.Shape"(%14197) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %14201 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14202 = torch.operator "onnx.Gather"(%14200, %14201) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14203 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14204 = torch.operator "onnx.Div"(%14202, %14203) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14205 = torch.operator "onnx.Cast"(%14204) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14206 = torch.operator "onnx.Cast"(%14205) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14207 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18749_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14208 = torch.operator "onnx.Unsqueeze"(%14193, %14207) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14209 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14210 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14211 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18753_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14212 = torch.operator "onnx.Unsqueeze"(%14206, %14211) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14213 = torch.operator "onnx.Concat"(%14208, %14209, %14210, %14212) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %14214 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18756_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14215 = torch.operator "onnx.Unsqueeze"(%14193, %14214) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14216 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14217 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14218 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18760_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14219 = torch.operator "onnx.Unsqueeze"(%14206, %14218) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14220 = torch.operator "onnx.Concat"(%14215, %14216, %14217, %14219) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %14221 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18763_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14222 = torch.operator "onnx.Unsqueeze"(%14193, %14221) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14223 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14224 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14225 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18767_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14226 = torch.operator "onnx.Unsqueeze"(%14206, %14225) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14227 = torch.operator "onnx.Concat"(%14222, %14223, %14224, %14226) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %14228 = torch.operator "onnx.Reshape"(%14195, %14213) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %14229 = torch.operator "onnx.Transpose"(%14228) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %14230 = torch.operator "onnx.Reshape"(%14197, %14220) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %14231 = torch.operator "onnx.Transpose"(%14230) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %14232 = torch.operator "onnx.Reshape"(%14199, %14227) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %14233 = torch.operator "onnx.Transpose"(%14232) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %14234 = torch.operator "onnx.Cast"(%14229) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %14235 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %14236 = torch.operator "onnx.Pow"(%14234, %14235) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %14237 = torch.operator "onnx.ReduceMean"(%14236) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14238 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %14239 = torch.operator "onnx.Add"(%14237, %14238) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14240 = torch.operator "onnx.Sqrt"(%14239) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14241 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %14242 = torch.operator "onnx.Div"(%14241, %14240) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14243 = torch.operator "onnx.Cast"(%14229) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %14244 = torch.operator "onnx.Mul"(%14243, %14242) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %14245 = torch.operator "onnx.Cast"(%14244) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %14246 = torch.operator "onnx.Mul"(%14245, %587) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %14247 = torch.operator "onnx.Cast"(%14231) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %14248 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %14249 = torch.operator "onnx.Pow"(%14247, %14248) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %14250 = torch.operator "onnx.ReduceMean"(%14249) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14251 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %14252 = torch.operator "onnx.Add"(%14250, %14251) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14253 = torch.operator "onnx.Sqrt"(%14252) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14254 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %14255 = torch.operator "onnx.Div"(%14254, %14253) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14256 = torch.operator "onnx.Cast"(%14231) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %14257 = torch.operator "onnx.Mul"(%14256, %14255) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %14258 = torch.operator "onnx.Cast"(%14257) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %14259 = torch.operator "onnx.Mul"(%14258, %588) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %14260 = torch.operator "onnx.Shape"(%14246) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %14261 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14262 = torch.operator "onnx.Gather"(%14260, %14261) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14263 = torch.operator "onnx.Shape"(%14246) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %14264 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14265 = torch.operator "onnx.Gather"(%14263, %14264) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14266 = torch.operator "onnx.Shape"(%14246) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %14267 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14268 = torch.operator "onnx.Gather"(%14266, %14267) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14269 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18811_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14270 = torch.operator "onnx.Unsqueeze"(%14262, %14269) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14271 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18813_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14272 = torch.operator "onnx.Unsqueeze"(%14265, %14271) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14273 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18815_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14274 = torch.operator "onnx.Unsqueeze"(%14268, %14273) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14275 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14276 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14277 = torch.operator "onnx.Concat"(%14270, %14272, %14274, %14275, %14276) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %14278 = torch.operator "onnx.Reshape"(%14246, %14277) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %14279 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %14280:2 = torch.operator "onnx.Split"(%14278, %14279) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %14281 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14282 = torch.operator "onnx.Squeeze"(%14280#0, %14281) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %14283 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14284 = torch.operator "onnx.Squeeze"(%14280#1, %14283) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %14285 = torch.operator "onnx.Neg"(%14284) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %14286 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14287 = torch.operator "onnx.Unsqueeze"(%14285, %14286) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %14288 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14289 = torch.operator "onnx.Unsqueeze"(%14282, %14288) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %14290 = torch.operator "onnx.Concat"(%14287, %14289) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %14291 = torch.operator "onnx.Shape"(%14290) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %14292 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14293 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14294 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14295 = torch.operator "onnx.Slice"(%14291, %14293, %14294, %14292) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %14296 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14297 = torch.operator "onnx.Concat"(%14295, %14296) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %14298 = torch.operator "onnx.Reshape"(%14290, %14297) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %14299 = torch.operator "onnx.Cast"(%14246) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %14300 = torch.operator "onnx.Mul"(%14299, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %14301 = torch.operator "onnx.Cast"(%14298) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %14302 = torch.operator "onnx.Mul"(%14301, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %14303 = torch.operator "onnx.Add"(%14300, %14302) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %14304 = torch.operator "onnx.Cast"(%14303) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %14305 = torch.operator "onnx.Shape"(%14259) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %14306 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14307 = torch.operator "onnx.Gather"(%14305, %14306) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14308 = torch.operator "onnx.Shape"(%14259) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %14309 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14310 = torch.operator "onnx.Gather"(%14308, %14309) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14311 = torch.operator "onnx.Shape"(%14259) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %14312 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14313 = torch.operator "onnx.Gather"(%14311, %14312) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14314 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18856_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14315 = torch.operator "onnx.Unsqueeze"(%14307, %14314) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14316 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18858_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14317 = torch.operator "onnx.Unsqueeze"(%14310, %14316) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14318 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18860_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14319 = torch.operator "onnx.Unsqueeze"(%14313, %14318) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14320 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14321 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14322 = torch.operator "onnx.Concat"(%14315, %14317, %14319, %14320, %14321) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %14323 = torch.operator "onnx.Reshape"(%14259, %14322) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %14324 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %14325:2 = torch.operator "onnx.Split"(%14323, %14324) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %14326 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14327 = torch.operator "onnx.Squeeze"(%14325#0, %14326) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %14328 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14329 = torch.operator "onnx.Squeeze"(%14325#1, %14328) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %14330 = torch.operator "onnx.Neg"(%14329) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %14331 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14332 = torch.operator "onnx.Unsqueeze"(%14330, %14331) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %14333 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14334 = torch.operator "onnx.Unsqueeze"(%14327, %14333) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %14335 = torch.operator "onnx.Concat"(%14332, %14334) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %14336 = torch.operator "onnx.Shape"(%14335) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %14337 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14338 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14339 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14340 = torch.operator "onnx.Slice"(%14336, %14338, %14339, %14337) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %14341 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14342 = torch.operator "onnx.Concat"(%14340, %14341) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %14343 = torch.operator "onnx.Reshape"(%14335, %14342) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %14344 = torch.operator "onnx.Cast"(%14259) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %14345 = torch.operator "onnx.Mul"(%14344, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %14346 = torch.operator "onnx.Cast"(%14343) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %14347 = torch.operator "onnx.Mul"(%14346, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %14348 = torch.operator "onnx.Add"(%14345, %14347) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %14349 = torch.operator "onnx.Cast"(%14348) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %14350 = torch.operator "onnx.Shape"(%14304) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %14351 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14352 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14353 = torch.operator "onnx.Slice"(%14350, %14351, %14352) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14354 = torch.operator "onnx.Cast"(%14353) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %14355 = torch.operator "onnx.Sqrt"(%14354) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %14356 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %14357 = torch.operator "onnx.Cast"(%14355) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %14358 = torch.operator "onnx.Div"(%14356, %14357) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %14359 = torch.operator "onnx.Cast"(%14358) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %14360 = torch.operator "onnx.Transpose"(%14349) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %14361 = torch.operator "onnx.Sqrt"(%14359) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %14362 = torch.operator "onnx.Mul"(%14304, %14361) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %14363 = torch.operator "onnx.Sqrt"(%14359) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %14364 = torch.operator "onnx.Mul"(%14360, %14363) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %14365 = torch.operator "onnx.MatMul"(%14362, %14364) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %14366 = torch.operator "onnx.Softmax"(%14365) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %14367 = torch.operator "onnx.MatMul"(%14366, %14233) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %14368 = torch.operator "onnx.Transpose"(%14367) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %14369 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14370 = torch.operator "onnx.Mul"(%14206, %14369) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14371 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18913_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14372 = torch.operator "onnx.Unsqueeze"(%14193, %14371) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14373 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14374 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18916_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14375 = torch.operator "onnx.Unsqueeze"(%14370, %14374) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14376 = torch.operator "onnx.Concat"(%14372, %14373, %14375) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %14377 = torch.operator "onnx.Reshape"(%14368, %14376) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %14378 = torch.operator "onnx.Cast"(%14377) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %14379 = torch.operator "onnx.Concat"(%14378, %14190) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %14380 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.21_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14381 = torch.operator "onnx.Unsqueeze"(%14164, %14380) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %14382 = torch.operator "onnx.MatMul"(%14379, %1078) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14383 = torch.operator "onnx.Add"(%586, %14382) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14384 = torch.operator "onnx.Mul"(%14381, %14383) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14385 = torch.operator "onnx.Add"(%14146, %14384) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14386 = torch.operator "onnx.Gemm"(%1285, %592, %593) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %14387 = torch.operator "onnx.Shape"(%14386) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %14388 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14389 = torch.operator "onnx.Gather"(%14387, %14388) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14390 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14391 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14392 = torch.operator "onnx.Add"(%14389, %14391) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14393 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14394 = torch.operator "onnx.Div"(%14392, %14393) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14395 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14396 = torch.operator "onnx.Mul"(%14394, %14395) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14397 = torch.operator "onnx.Slice"(%14386, %14390, %14396, %14388) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %14398 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14399 = torch.operator "onnx.Mul"(%14394, %14398) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14400 = torch.operator "onnx.Slice"(%14386, %14396, %14399, %14388) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %14401 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14402 = torch.operator "onnx.Mul"(%14394, %14401) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14403 = torch.operator "onnx.Slice"(%14386, %14399, %14402, %14388) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.222Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.22/norm/norm/Constant_attr__value" : tensor<3072xbf16> %14404 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.222Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.222Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.22/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %14405 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.222Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %14406 = torch.operator "onnx.LayerNormalization"(%14385, %14404, %14405) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14407 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14408 = torch.operator "onnx.Unsqueeze"(%14400, %14407) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %14409 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %14410 = torch.operator "onnx.Add"(%14408, %14409) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %14411 = torch.operator "onnx.Mul"(%14406, %14410) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14412 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14413 = torch.operator "onnx.Unsqueeze"(%14397, %14412) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %14414 = torch.operator "onnx.Add"(%14411, %14413) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14415 = torch.operator "onnx.MatMul"(%14414, %1079) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14416 = torch.operator "onnx.Add"(%594, %14415) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14417 = torch.operator "onnx.Mul"(%14416, %14416) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14418 = torch.operator "onnx.Mul"(%14416, %14417) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14419 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %14420 = torch.operator "onnx.Mul"(%14419, %14418) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14421 = torch.operator "onnx.Add"(%14416, %14420) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14422 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %14423 = torch.operator "onnx.Mul"(%14422, %14421) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14424 = torch.operator "onnx.Tanh"(%14423) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14425 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %14426 = torch.operator "onnx.Add"(%14425, %14424) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14427 = torch.operator "onnx.Mul"(%14416, %14426) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14428 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %14429 = torch.operator "onnx.Mul"(%14428, %14427) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14430 = torch.operator "onnx.Shape"(%14414) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %14431 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14432 = torch.operator "onnx.Gather"(%14430, %14431) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14433 = torch.operator "onnx.MatMul"(%14414, %1080) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14434 = torch.operator "onnx.Add"(%598, %14433) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14435 = torch.operator "onnx.MatMul"(%14414, %1081) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14436 = torch.operator "onnx.Add"(%599, %14435) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14437 = torch.operator "onnx.MatMul"(%14414, %1082) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14438 = torch.operator "onnx.Add"(%600, %14437) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14439 = torch.operator "onnx.Shape"(%14436) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %14440 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14441 = torch.operator "onnx.Gather"(%14439, %14440) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14442 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14443 = torch.operator "onnx.Div"(%14441, %14442) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14444 = torch.operator "onnx.Cast"(%14443) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14445 = torch.operator "onnx.Cast"(%14444) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14446 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18988_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14447 = torch.operator "onnx.Unsqueeze"(%14432, %14446) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14448 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14449 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14450 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18992_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14451 = torch.operator "onnx.Unsqueeze"(%14445, %14450) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14452 = torch.operator "onnx.Concat"(%14447, %14448, %14449, %14451) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %14453 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18995_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14454 = torch.operator "onnx.Unsqueeze"(%14432, %14453) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14455 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14456 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14457 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_18999_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14458 = torch.operator "onnx.Unsqueeze"(%14445, %14457) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14459 = torch.operator "onnx.Concat"(%14454, %14455, %14456, %14458) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %14460 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19002_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14461 = torch.operator "onnx.Unsqueeze"(%14432, %14460) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14462 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14463 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14464 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19006_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14465 = torch.operator "onnx.Unsqueeze"(%14445, %14464) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14466 = torch.operator "onnx.Concat"(%14461, %14462, %14463, %14465) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %14467 = torch.operator "onnx.Reshape"(%14434, %14452) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %14468 = torch.operator "onnx.Transpose"(%14467) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %14469 = torch.operator "onnx.Reshape"(%14436, %14459) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %14470 = torch.operator "onnx.Transpose"(%14469) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %14471 = torch.operator "onnx.Reshape"(%14438, %14466) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %14472 = torch.operator "onnx.Transpose"(%14471) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %14473 = torch.operator "onnx.Cast"(%14468) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %14474 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %14475 = torch.operator "onnx.Pow"(%14473, %14474) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %14476 = torch.operator "onnx.ReduceMean"(%14475) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14477 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %14478 = torch.operator "onnx.Add"(%14476, %14477) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14479 = torch.operator "onnx.Sqrt"(%14478) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14480 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %14481 = torch.operator "onnx.Div"(%14480, %14479) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14482 = torch.operator "onnx.Cast"(%14468) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %14483 = torch.operator "onnx.Mul"(%14482, %14481) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %14484 = torch.operator "onnx.Cast"(%14483) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %14485 = torch.operator "onnx.Mul"(%14484, %596) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %14486 = torch.operator "onnx.Cast"(%14470) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %14487 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %14488 = torch.operator "onnx.Pow"(%14486, %14487) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %14489 = torch.operator "onnx.ReduceMean"(%14488) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14490 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %14491 = torch.operator "onnx.Add"(%14489, %14490) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14492 = torch.operator "onnx.Sqrt"(%14491) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14493 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %14494 = torch.operator "onnx.Div"(%14493, %14492) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14495 = torch.operator "onnx.Cast"(%14470) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %14496 = torch.operator "onnx.Mul"(%14495, %14494) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %14497 = torch.operator "onnx.Cast"(%14496) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %14498 = torch.operator "onnx.Mul"(%14497, %597) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %14499 = torch.operator "onnx.Shape"(%14485) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %14500 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14501 = torch.operator "onnx.Gather"(%14499, %14500) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14502 = torch.operator "onnx.Shape"(%14485) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %14503 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14504 = torch.operator "onnx.Gather"(%14502, %14503) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14505 = torch.operator "onnx.Shape"(%14485) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %14506 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14507 = torch.operator "onnx.Gather"(%14505, %14506) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14508 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19050_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14509 = torch.operator "onnx.Unsqueeze"(%14501, %14508) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14510 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19052_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14511 = torch.operator "onnx.Unsqueeze"(%14504, %14510) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14512 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19054_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14513 = torch.operator "onnx.Unsqueeze"(%14507, %14512) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14514 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14515 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14516 = torch.operator "onnx.Concat"(%14509, %14511, %14513, %14514, %14515) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %14517 = torch.operator "onnx.Reshape"(%14485, %14516) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %14518 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %14519:2 = torch.operator "onnx.Split"(%14517, %14518) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %14520 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14521 = torch.operator "onnx.Squeeze"(%14519#0, %14520) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %14522 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14523 = torch.operator "onnx.Squeeze"(%14519#1, %14522) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %14524 = torch.operator "onnx.Neg"(%14523) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %14525 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14526 = torch.operator "onnx.Unsqueeze"(%14524, %14525) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %14527 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14528 = torch.operator "onnx.Unsqueeze"(%14521, %14527) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %14529 = torch.operator "onnx.Concat"(%14526, %14528) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %14530 = torch.operator "onnx.Shape"(%14529) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %14531 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14532 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14533 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14534 = torch.operator "onnx.Slice"(%14530, %14532, %14533, %14531) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %14535 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14536 = torch.operator "onnx.Concat"(%14534, %14535) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %14537 = torch.operator "onnx.Reshape"(%14529, %14536) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %14538 = torch.operator "onnx.Cast"(%14485) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %14539 = torch.operator "onnx.Mul"(%14538, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %14540 = torch.operator "onnx.Cast"(%14537) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %14541 = torch.operator "onnx.Mul"(%14540, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %14542 = torch.operator "onnx.Add"(%14539, %14541) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %14543 = torch.operator "onnx.Cast"(%14542) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %14544 = torch.operator "onnx.Shape"(%14498) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %14545 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14546 = torch.operator "onnx.Gather"(%14544, %14545) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14547 = torch.operator "onnx.Shape"(%14498) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %14548 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14549 = torch.operator "onnx.Gather"(%14547, %14548) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14550 = torch.operator "onnx.Shape"(%14498) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %14551 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14552 = torch.operator "onnx.Gather"(%14550, %14551) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14553 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19095_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14554 = torch.operator "onnx.Unsqueeze"(%14546, %14553) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14555 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19097_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14556 = torch.operator "onnx.Unsqueeze"(%14549, %14555) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14557 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19099_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14558 = torch.operator "onnx.Unsqueeze"(%14552, %14557) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14559 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14560 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14561 = torch.operator "onnx.Concat"(%14554, %14556, %14558, %14559, %14560) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %14562 = torch.operator "onnx.Reshape"(%14498, %14561) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %14563 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %14564:2 = torch.operator "onnx.Split"(%14562, %14563) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %14565 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14566 = torch.operator "onnx.Squeeze"(%14564#0, %14565) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %14567 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14568 = torch.operator "onnx.Squeeze"(%14564#1, %14567) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %14569 = torch.operator "onnx.Neg"(%14568) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %14570 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14571 = torch.operator "onnx.Unsqueeze"(%14569, %14570) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %14572 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14573 = torch.operator "onnx.Unsqueeze"(%14566, %14572) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %14574 = torch.operator "onnx.Concat"(%14571, %14573) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %14575 = torch.operator "onnx.Shape"(%14574) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %14576 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14577 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14578 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14579 = torch.operator "onnx.Slice"(%14575, %14577, %14578, %14576) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %14580 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14581 = torch.operator "onnx.Concat"(%14579, %14580) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %14582 = torch.operator "onnx.Reshape"(%14574, %14581) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %14583 = torch.operator "onnx.Cast"(%14498) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %14584 = torch.operator "onnx.Mul"(%14583, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %14585 = torch.operator "onnx.Cast"(%14582) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %14586 = torch.operator "onnx.Mul"(%14585, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %14587 = torch.operator "onnx.Add"(%14584, %14586) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %14588 = torch.operator "onnx.Cast"(%14587) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %14589 = torch.operator "onnx.Shape"(%14543) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %14590 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14591 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14592 = torch.operator "onnx.Slice"(%14589, %14590, %14591) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14593 = torch.operator "onnx.Cast"(%14592) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %14594 = torch.operator "onnx.Sqrt"(%14593) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %14595 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %14596 = torch.operator "onnx.Cast"(%14594) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %14597 = torch.operator "onnx.Div"(%14595, %14596) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %14598 = torch.operator "onnx.Cast"(%14597) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %14599 = torch.operator "onnx.Transpose"(%14588) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %14600 = torch.operator "onnx.Sqrt"(%14598) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %14601 = torch.operator "onnx.Mul"(%14543, %14600) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %14602 = torch.operator "onnx.Sqrt"(%14598) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %14603 = torch.operator "onnx.Mul"(%14599, %14602) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %14604 = torch.operator "onnx.MatMul"(%14601, %14603) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %14605 = torch.operator "onnx.Softmax"(%14604) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %14606 = torch.operator "onnx.MatMul"(%14605, %14472) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %14607 = torch.operator "onnx.Transpose"(%14606) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %14608 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14609 = torch.operator "onnx.Mul"(%14445, %14608) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14610 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19152_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14611 = torch.operator "onnx.Unsqueeze"(%14432, %14610) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14612 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14613 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19155_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14614 = torch.operator "onnx.Unsqueeze"(%14609, %14613) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14615 = torch.operator "onnx.Concat"(%14611, %14612, %14614) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %14616 = torch.operator "onnx.Reshape"(%14607, %14615) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %14617 = torch.operator "onnx.Cast"(%14616) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %14618 = torch.operator "onnx.Concat"(%14617, %14429) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %14619 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.22_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14620 = torch.operator "onnx.Unsqueeze"(%14403, %14619) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %14621 = torch.operator "onnx.MatMul"(%14618, %1083) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14622 = torch.operator "onnx.Add"(%595, %14621) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14623 = torch.operator "onnx.Mul"(%14620, %14622) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14624 = torch.operator "onnx.Add"(%14385, %14623) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14625 = torch.operator "onnx.Gemm"(%1285, %601, %602) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %14626 = torch.operator "onnx.Shape"(%14625) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %14627 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14628 = torch.operator "onnx.Gather"(%14626, %14627) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14629 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14630 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14631 = torch.operator "onnx.Add"(%14628, %14630) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14632 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14633 = torch.operator "onnx.Div"(%14631, %14632) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14634 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14635 = torch.operator "onnx.Mul"(%14633, %14634) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14636 = torch.operator "onnx.Slice"(%14625, %14629, %14635, %14627) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %14637 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14638 = torch.operator "onnx.Mul"(%14633, %14637) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14639 = torch.operator "onnx.Slice"(%14625, %14635, %14638, %14627) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %14640 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14641 = torch.operator "onnx.Mul"(%14633, %14640) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14642 = torch.operator "onnx.Slice"(%14625, %14638, %14641, %14627) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.232Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.23/norm/norm/Constant_attr__value" : tensor<3072xbf16> %14643 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.232Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.232Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.23/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %14644 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.232Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %14645 = torch.operator "onnx.LayerNormalization"(%14624, %14643, %14644) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14646 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14647 = torch.operator "onnx.Unsqueeze"(%14639, %14646) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %14648 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %14649 = torch.operator "onnx.Add"(%14647, %14648) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %14650 = torch.operator "onnx.Mul"(%14645, %14649) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14651 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14652 = torch.operator "onnx.Unsqueeze"(%14636, %14651) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %14653 = torch.operator "onnx.Add"(%14650, %14652) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14654 = torch.operator "onnx.MatMul"(%14653, %1084) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14655 = torch.operator "onnx.Add"(%603, %14654) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14656 = torch.operator "onnx.Mul"(%14655, %14655) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14657 = torch.operator "onnx.Mul"(%14655, %14656) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14658 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %14659 = torch.operator "onnx.Mul"(%14658, %14657) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14660 = torch.operator "onnx.Add"(%14655, %14659) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14661 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %14662 = torch.operator "onnx.Mul"(%14661, %14660) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14663 = torch.operator "onnx.Tanh"(%14662) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14664 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %14665 = torch.operator "onnx.Add"(%14664, %14663) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14666 = torch.operator "onnx.Mul"(%14655, %14665) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14667 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %14668 = torch.operator "onnx.Mul"(%14667, %14666) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14669 = torch.operator "onnx.Shape"(%14653) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %14670 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14671 = torch.operator "onnx.Gather"(%14669, %14670) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14672 = torch.operator "onnx.MatMul"(%14653, %1085) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14673 = torch.operator "onnx.Add"(%607, %14672) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14674 = torch.operator "onnx.MatMul"(%14653, %1086) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14675 = torch.operator "onnx.Add"(%608, %14674) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14676 = torch.operator "onnx.MatMul"(%14653, %1087) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14677 = torch.operator "onnx.Add"(%609, %14676) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14678 = torch.operator "onnx.Shape"(%14675) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %14679 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14680 = torch.operator "onnx.Gather"(%14678, %14679) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14681 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14682 = torch.operator "onnx.Div"(%14680, %14681) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14683 = torch.operator "onnx.Cast"(%14682) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14684 = torch.operator "onnx.Cast"(%14683) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14685 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19227_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14686 = torch.operator "onnx.Unsqueeze"(%14671, %14685) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14687 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14688 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14689 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19231_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14690 = torch.operator "onnx.Unsqueeze"(%14684, %14689) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14691 = torch.operator "onnx.Concat"(%14686, %14687, %14688, %14690) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %14692 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19234_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14693 = torch.operator "onnx.Unsqueeze"(%14671, %14692) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14694 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14695 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14696 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19238_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14697 = torch.operator "onnx.Unsqueeze"(%14684, %14696) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14698 = torch.operator "onnx.Concat"(%14693, %14694, %14695, %14697) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %14699 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19241_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14700 = torch.operator "onnx.Unsqueeze"(%14671, %14699) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14701 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14702 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14703 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19245_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14704 = torch.operator "onnx.Unsqueeze"(%14684, %14703) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14705 = torch.operator "onnx.Concat"(%14700, %14701, %14702, %14704) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %14706 = torch.operator "onnx.Reshape"(%14673, %14691) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %14707 = torch.operator "onnx.Transpose"(%14706) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %14708 = torch.operator "onnx.Reshape"(%14675, %14698) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %14709 = torch.operator "onnx.Transpose"(%14708) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %14710 = torch.operator "onnx.Reshape"(%14677, %14705) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %14711 = torch.operator "onnx.Transpose"(%14710) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %14712 = torch.operator "onnx.Cast"(%14707) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %14713 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %14714 = torch.operator "onnx.Pow"(%14712, %14713) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %14715 = torch.operator "onnx.ReduceMean"(%14714) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14716 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %14717 = torch.operator "onnx.Add"(%14715, %14716) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14718 = torch.operator "onnx.Sqrt"(%14717) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14719 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %14720 = torch.operator "onnx.Div"(%14719, %14718) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14721 = torch.operator "onnx.Cast"(%14707) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %14722 = torch.operator "onnx.Mul"(%14721, %14720) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %14723 = torch.operator "onnx.Cast"(%14722) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %14724 = torch.operator "onnx.Mul"(%14723, %605) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %14725 = torch.operator "onnx.Cast"(%14709) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %14726 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %14727 = torch.operator "onnx.Pow"(%14725, %14726) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %14728 = torch.operator "onnx.ReduceMean"(%14727) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14729 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %14730 = torch.operator "onnx.Add"(%14728, %14729) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14731 = torch.operator "onnx.Sqrt"(%14730) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14732 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %14733 = torch.operator "onnx.Div"(%14732, %14731) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14734 = torch.operator "onnx.Cast"(%14709) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %14735 = torch.operator "onnx.Mul"(%14734, %14733) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %14736 = torch.operator "onnx.Cast"(%14735) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %14737 = torch.operator "onnx.Mul"(%14736, %606) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %14738 = torch.operator "onnx.Shape"(%14724) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %14739 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14740 = torch.operator "onnx.Gather"(%14738, %14739) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14741 = torch.operator "onnx.Shape"(%14724) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %14742 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14743 = torch.operator "onnx.Gather"(%14741, %14742) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14744 = torch.operator "onnx.Shape"(%14724) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %14745 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14746 = torch.operator "onnx.Gather"(%14744, %14745) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14747 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19289_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14748 = torch.operator "onnx.Unsqueeze"(%14740, %14747) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14749 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19291_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14750 = torch.operator "onnx.Unsqueeze"(%14743, %14749) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14751 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19293_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14752 = torch.operator "onnx.Unsqueeze"(%14746, %14751) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14753 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14754 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14755 = torch.operator "onnx.Concat"(%14748, %14750, %14752, %14753, %14754) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %14756 = torch.operator "onnx.Reshape"(%14724, %14755) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %14757 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %14758:2 = torch.operator "onnx.Split"(%14756, %14757) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %14759 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14760 = torch.operator "onnx.Squeeze"(%14758#0, %14759) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %14761 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14762 = torch.operator "onnx.Squeeze"(%14758#1, %14761) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %14763 = torch.operator "onnx.Neg"(%14762) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %14764 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14765 = torch.operator "onnx.Unsqueeze"(%14763, %14764) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %14766 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14767 = torch.operator "onnx.Unsqueeze"(%14760, %14766) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %14768 = torch.operator "onnx.Concat"(%14765, %14767) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %14769 = torch.operator "onnx.Shape"(%14768) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %14770 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14771 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14772 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14773 = torch.operator "onnx.Slice"(%14769, %14771, %14772, %14770) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %14774 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14775 = torch.operator "onnx.Concat"(%14773, %14774) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %14776 = torch.operator "onnx.Reshape"(%14768, %14775) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %14777 = torch.operator "onnx.Cast"(%14724) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %14778 = torch.operator "onnx.Mul"(%14777, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %14779 = torch.operator "onnx.Cast"(%14776) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %14780 = torch.operator "onnx.Mul"(%14779, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %14781 = torch.operator "onnx.Add"(%14778, %14780) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %14782 = torch.operator "onnx.Cast"(%14781) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %14783 = torch.operator "onnx.Shape"(%14737) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %14784 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14785 = torch.operator "onnx.Gather"(%14783, %14784) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14786 = torch.operator "onnx.Shape"(%14737) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %14787 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14788 = torch.operator "onnx.Gather"(%14786, %14787) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14789 = torch.operator "onnx.Shape"(%14737) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %14790 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14791 = torch.operator "onnx.Gather"(%14789, %14790) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14792 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19334_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14793 = torch.operator "onnx.Unsqueeze"(%14785, %14792) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14794 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19336_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14795 = torch.operator "onnx.Unsqueeze"(%14788, %14794) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14796 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19338_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14797 = torch.operator "onnx.Unsqueeze"(%14791, %14796) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14798 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14799 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14800 = torch.operator "onnx.Concat"(%14793, %14795, %14797, %14798, %14799) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %14801 = torch.operator "onnx.Reshape"(%14737, %14800) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %14802 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %14803:2 = torch.operator "onnx.Split"(%14801, %14802) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %14804 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14805 = torch.operator "onnx.Squeeze"(%14803#0, %14804) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %14806 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14807 = torch.operator "onnx.Squeeze"(%14803#1, %14806) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %14808 = torch.operator "onnx.Neg"(%14807) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %14809 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14810 = torch.operator "onnx.Unsqueeze"(%14808, %14809) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %14811 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14812 = torch.operator "onnx.Unsqueeze"(%14805, %14811) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %14813 = torch.operator "onnx.Concat"(%14810, %14812) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %14814 = torch.operator "onnx.Shape"(%14813) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %14815 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14816 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14817 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14818 = torch.operator "onnx.Slice"(%14814, %14816, %14817, %14815) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %14819 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14820 = torch.operator "onnx.Concat"(%14818, %14819) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %14821 = torch.operator "onnx.Reshape"(%14813, %14820) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %14822 = torch.operator "onnx.Cast"(%14737) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %14823 = torch.operator "onnx.Mul"(%14822, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %14824 = torch.operator "onnx.Cast"(%14821) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %14825 = torch.operator "onnx.Mul"(%14824, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %14826 = torch.operator "onnx.Add"(%14823, %14825) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %14827 = torch.operator "onnx.Cast"(%14826) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %14828 = torch.operator "onnx.Shape"(%14782) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %14829 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14830 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14831 = torch.operator "onnx.Slice"(%14828, %14829, %14830) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14832 = torch.operator "onnx.Cast"(%14831) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %14833 = torch.operator "onnx.Sqrt"(%14832) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %14834 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %14835 = torch.operator "onnx.Cast"(%14833) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %14836 = torch.operator "onnx.Div"(%14834, %14835) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %14837 = torch.operator "onnx.Cast"(%14836) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %14838 = torch.operator "onnx.Transpose"(%14827) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %14839 = torch.operator "onnx.Sqrt"(%14837) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %14840 = torch.operator "onnx.Mul"(%14782, %14839) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %14841 = torch.operator "onnx.Sqrt"(%14837) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %14842 = torch.operator "onnx.Mul"(%14838, %14841) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %14843 = torch.operator "onnx.MatMul"(%14840, %14842) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %14844 = torch.operator "onnx.Softmax"(%14843) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %14845 = torch.operator "onnx.MatMul"(%14844, %14711) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %14846 = torch.operator "onnx.Transpose"(%14845) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %14847 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14848 = torch.operator "onnx.Mul"(%14684, %14847) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14849 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19391_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14850 = torch.operator "onnx.Unsqueeze"(%14671, %14849) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14851 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14852 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19394_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14853 = torch.operator "onnx.Unsqueeze"(%14848, %14852) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14854 = torch.operator "onnx.Concat"(%14850, %14851, %14853) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %14855 = torch.operator "onnx.Reshape"(%14846, %14854) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %14856 = torch.operator "onnx.Cast"(%14855) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %14857 = torch.operator "onnx.Concat"(%14856, %14668) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %14858 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.23_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14859 = torch.operator "onnx.Unsqueeze"(%14642, %14858) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %14860 = torch.operator "onnx.MatMul"(%14857, %1088) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14861 = torch.operator "onnx.Add"(%604, %14860) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14862 = torch.operator "onnx.Mul"(%14859, %14861) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14863 = torch.operator "onnx.Add"(%14624, %14862) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14864 = torch.operator "onnx.Gemm"(%1285, %610, %611) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %14865 = torch.operator "onnx.Shape"(%14864) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %14866 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14867 = torch.operator "onnx.Gather"(%14865, %14866) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14868 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14869 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14870 = torch.operator "onnx.Add"(%14867, %14869) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14871 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14872 = torch.operator "onnx.Div"(%14870, %14871) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14873 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14874 = torch.operator "onnx.Mul"(%14872, %14873) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14875 = torch.operator "onnx.Slice"(%14864, %14868, %14874, %14866) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %14876 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14877 = torch.operator "onnx.Mul"(%14872, %14876) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14878 = torch.operator "onnx.Slice"(%14864, %14874, %14877, %14866) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %14879 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14880 = torch.operator "onnx.Mul"(%14872, %14879) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14881 = torch.operator "onnx.Slice"(%14864, %14877, %14880, %14866) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.242Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.24/norm/norm/Constant_attr__value" : tensor<3072xbf16> %14882 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.242Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.242Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.24/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %14883 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.242Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %14884 = torch.operator "onnx.LayerNormalization"(%14863, %14882, %14883) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14885 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14886 = torch.operator "onnx.Unsqueeze"(%14878, %14885) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %14887 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %14888 = torch.operator "onnx.Add"(%14886, %14887) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %14889 = torch.operator "onnx.Mul"(%14884, %14888) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14890 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14891 = torch.operator "onnx.Unsqueeze"(%14875, %14890) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %14892 = torch.operator "onnx.Add"(%14889, %14891) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14893 = torch.operator "onnx.MatMul"(%14892, %1089) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14894 = torch.operator "onnx.Add"(%612, %14893) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14895 = torch.operator "onnx.Mul"(%14894, %14894) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14896 = torch.operator "onnx.Mul"(%14894, %14895) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14897 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %14898 = torch.operator "onnx.Mul"(%14897, %14896) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14899 = torch.operator "onnx.Add"(%14894, %14898) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14900 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %14901 = torch.operator "onnx.Mul"(%14900, %14899) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14902 = torch.operator "onnx.Tanh"(%14901) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14903 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %14904 = torch.operator "onnx.Add"(%14903, %14902) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14905 = torch.operator "onnx.Mul"(%14894, %14904) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14906 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %14907 = torch.operator "onnx.Mul"(%14906, %14905) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %14908 = torch.operator "onnx.Shape"(%14892) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %14909 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14910 = torch.operator "onnx.Gather"(%14908, %14909) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14911 = torch.operator "onnx.MatMul"(%14892, %1090) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14912 = torch.operator "onnx.Add"(%616, %14911) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14913 = torch.operator "onnx.MatMul"(%14892, %1091) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14914 = torch.operator "onnx.Add"(%617, %14913) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14915 = torch.operator "onnx.MatMul"(%14892, %1092) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14916 = torch.operator "onnx.Add"(%618, %14915) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %14917 = torch.operator "onnx.Shape"(%14914) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %14918 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14919 = torch.operator "onnx.Gather"(%14917, %14918) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14920 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14921 = torch.operator "onnx.Div"(%14919, %14920) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14922 = torch.operator "onnx.Cast"(%14921) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14923 = torch.operator "onnx.Cast"(%14922) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14924 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19466_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14925 = torch.operator "onnx.Unsqueeze"(%14910, %14924) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14926 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14927 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14928 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19470_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14929 = torch.operator "onnx.Unsqueeze"(%14923, %14928) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14930 = torch.operator "onnx.Concat"(%14925, %14926, %14927, %14929) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %14931 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19473_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14932 = torch.operator "onnx.Unsqueeze"(%14910, %14931) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14933 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14934 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14935 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19477_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14936 = torch.operator "onnx.Unsqueeze"(%14923, %14935) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14937 = torch.operator "onnx.Concat"(%14932, %14933, %14934, %14936) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %14938 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19480_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14939 = torch.operator "onnx.Unsqueeze"(%14910, %14938) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14940 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14941 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14942 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19484_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14943 = torch.operator "onnx.Unsqueeze"(%14923, %14942) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14944 = torch.operator "onnx.Concat"(%14939, %14940, %14941, %14943) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %14945 = torch.operator "onnx.Reshape"(%14912, %14930) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %14946 = torch.operator "onnx.Transpose"(%14945) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %14947 = torch.operator "onnx.Reshape"(%14914, %14937) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %14948 = torch.operator "onnx.Transpose"(%14947) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %14949 = torch.operator "onnx.Reshape"(%14916, %14944) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %14950 = torch.operator "onnx.Transpose"(%14949) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %14951 = torch.operator "onnx.Cast"(%14946) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %14952 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %14953 = torch.operator "onnx.Pow"(%14951, %14952) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %14954 = torch.operator "onnx.ReduceMean"(%14953) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14955 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %14956 = torch.operator "onnx.Add"(%14954, %14955) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14957 = torch.operator "onnx.Sqrt"(%14956) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14958 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %14959 = torch.operator "onnx.Div"(%14958, %14957) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14960 = torch.operator "onnx.Cast"(%14946) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %14961 = torch.operator "onnx.Mul"(%14960, %14959) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %14962 = torch.operator "onnx.Cast"(%14961) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %14963 = torch.operator "onnx.Mul"(%14962, %614) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %14964 = torch.operator "onnx.Cast"(%14948) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %14965 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %14966 = torch.operator "onnx.Pow"(%14964, %14965) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %14967 = torch.operator "onnx.ReduceMean"(%14966) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14968 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %14969 = torch.operator "onnx.Add"(%14967, %14968) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14970 = torch.operator "onnx.Sqrt"(%14969) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14971 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %14972 = torch.operator "onnx.Div"(%14971, %14970) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %14973 = torch.operator "onnx.Cast"(%14948) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %14974 = torch.operator "onnx.Mul"(%14973, %14972) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %14975 = torch.operator "onnx.Cast"(%14974) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %14976 = torch.operator "onnx.Mul"(%14975, %615) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %14977 = torch.operator "onnx.Shape"(%14963) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %14978 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14979 = torch.operator "onnx.Gather"(%14977, %14978) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14980 = torch.operator "onnx.Shape"(%14963) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %14981 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14982 = torch.operator "onnx.Gather"(%14980, %14981) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14983 = torch.operator "onnx.Shape"(%14963) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %14984 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %14985 = torch.operator "onnx.Gather"(%14983, %14984) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %14986 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19528_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14987 = torch.operator "onnx.Unsqueeze"(%14979, %14986) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14988 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19530_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14989 = torch.operator "onnx.Unsqueeze"(%14982, %14988) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14990 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19532_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14991 = torch.operator "onnx.Unsqueeze"(%14985, %14990) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %14992 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14993 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14994 = torch.operator "onnx.Concat"(%14987, %14989, %14991, %14992, %14993) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %14995 = torch.operator "onnx.Reshape"(%14963, %14994) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %14996 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %14997:2 = torch.operator "onnx.Split"(%14995, %14996) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %14998 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %14999 = torch.operator "onnx.Squeeze"(%14997#0, %14998) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15000 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15001 = torch.operator "onnx.Squeeze"(%14997#1, %15000) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15002 = torch.operator "onnx.Neg"(%15001) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %15003 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15004 = torch.operator "onnx.Unsqueeze"(%15002, %15003) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %15005 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15006 = torch.operator "onnx.Unsqueeze"(%14999, %15005) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %15007 = torch.operator "onnx.Concat"(%15004, %15006) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %15008 = torch.operator "onnx.Shape"(%15007) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %15009 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15010 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15011 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15012 = torch.operator "onnx.Slice"(%15008, %15010, %15011, %15009) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %15013 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15014 = torch.operator "onnx.Concat"(%15012, %15013) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %15015 = torch.operator "onnx.Reshape"(%15007, %15014) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15016 = torch.operator "onnx.Cast"(%14963) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %15017 = torch.operator "onnx.Mul"(%15016, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %15018 = torch.operator "onnx.Cast"(%15015) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %15019 = torch.operator "onnx.Mul"(%15018, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %15020 = torch.operator "onnx.Add"(%15017, %15019) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %15021 = torch.operator "onnx.Cast"(%15020) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %15022 = torch.operator "onnx.Shape"(%14976) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %15023 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15024 = torch.operator "onnx.Gather"(%15022, %15023) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15025 = torch.operator "onnx.Shape"(%14976) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %15026 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15027 = torch.operator "onnx.Gather"(%15025, %15026) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15028 = torch.operator "onnx.Shape"(%14976) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %15029 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15030 = torch.operator "onnx.Gather"(%15028, %15029) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15031 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19573_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15032 = torch.operator "onnx.Unsqueeze"(%15024, %15031) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15033 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19575_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15034 = torch.operator "onnx.Unsqueeze"(%15027, %15033) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15035 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19577_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15036 = torch.operator "onnx.Unsqueeze"(%15030, %15035) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15037 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15038 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15039 = torch.operator "onnx.Concat"(%15032, %15034, %15036, %15037, %15038) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %15040 = torch.operator "onnx.Reshape"(%14976, %15039) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %15041 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %15042:2 = torch.operator "onnx.Split"(%15040, %15041) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %15043 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15044 = torch.operator "onnx.Squeeze"(%15042#0, %15043) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15045 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15046 = torch.operator "onnx.Squeeze"(%15042#1, %15045) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15047 = torch.operator "onnx.Neg"(%15046) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %15048 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15049 = torch.operator "onnx.Unsqueeze"(%15047, %15048) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %15050 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15051 = torch.operator "onnx.Unsqueeze"(%15044, %15050) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %15052 = torch.operator "onnx.Concat"(%15049, %15051) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %15053 = torch.operator "onnx.Shape"(%15052) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %15054 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15055 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15056 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15057 = torch.operator "onnx.Slice"(%15053, %15055, %15056, %15054) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %15058 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15059 = torch.operator "onnx.Concat"(%15057, %15058) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %15060 = torch.operator "onnx.Reshape"(%15052, %15059) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15061 = torch.operator "onnx.Cast"(%14976) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %15062 = torch.operator "onnx.Mul"(%15061, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %15063 = torch.operator "onnx.Cast"(%15060) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %15064 = torch.operator "onnx.Mul"(%15063, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %15065 = torch.operator "onnx.Add"(%15062, %15064) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %15066 = torch.operator "onnx.Cast"(%15065) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %15067 = torch.operator "onnx.Shape"(%15021) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %15068 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15069 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15070 = torch.operator "onnx.Slice"(%15067, %15068, %15069) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15071 = torch.operator "onnx.Cast"(%15070) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %15072 = torch.operator "onnx.Sqrt"(%15071) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %15073 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %15074 = torch.operator "onnx.Cast"(%15072) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %15075 = torch.operator "onnx.Div"(%15073, %15074) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %15076 = torch.operator "onnx.Cast"(%15075) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %15077 = torch.operator "onnx.Transpose"(%15066) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %15078 = torch.operator "onnx.Sqrt"(%15076) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %15079 = torch.operator "onnx.Mul"(%15021, %15078) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %15080 = torch.operator "onnx.Sqrt"(%15076) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %15081 = torch.operator "onnx.Mul"(%15077, %15080) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %15082 = torch.operator "onnx.MatMul"(%15079, %15081) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %15083 = torch.operator "onnx.Softmax"(%15082) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %15084 = torch.operator "onnx.MatMul"(%15083, %14950) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %15085 = torch.operator "onnx.Transpose"(%15084) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %15086 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15087 = torch.operator "onnx.Mul"(%14923, %15086) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15088 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19630_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15089 = torch.operator "onnx.Unsqueeze"(%14910, %15088) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15090 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15091 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19633_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15092 = torch.operator "onnx.Unsqueeze"(%15087, %15091) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15093 = torch.operator "onnx.Concat"(%15089, %15090, %15092) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %15094 = torch.operator "onnx.Reshape"(%15085, %15093) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %15095 = torch.operator "onnx.Cast"(%15094) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %15096 = torch.operator "onnx.Concat"(%15095, %14907) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %15097 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.24_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15098 = torch.operator "onnx.Unsqueeze"(%14881, %15097) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %15099 = torch.operator "onnx.MatMul"(%15096, %1093) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15100 = torch.operator "onnx.Add"(%613, %15099) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15101 = torch.operator "onnx.Mul"(%15098, %15100) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15102 = torch.operator "onnx.Add"(%14863, %15101) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15103 = torch.operator "onnx.Gemm"(%1285, %619, %620) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %15104 = torch.operator "onnx.Shape"(%15103) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %15105 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15106 = torch.operator "onnx.Gather"(%15104, %15105) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15107 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15108 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15109 = torch.operator "onnx.Add"(%15106, %15108) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15110 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15111 = torch.operator "onnx.Div"(%15109, %15110) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15112 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15113 = torch.operator "onnx.Mul"(%15111, %15112) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15114 = torch.operator "onnx.Slice"(%15103, %15107, %15113, %15105) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %15115 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15116 = torch.operator "onnx.Mul"(%15111, %15115) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15117 = torch.operator "onnx.Slice"(%15103, %15113, %15116, %15105) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %15118 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15119 = torch.operator "onnx.Mul"(%15111, %15118) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15120 = torch.operator "onnx.Slice"(%15103, %15116, %15119, %15105) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.252Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.25/norm/norm/Constant_attr__value" : tensor<3072xbf16> %15121 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.252Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.252Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.25/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %15122 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.252Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %15123 = torch.operator "onnx.LayerNormalization"(%15102, %15121, %15122) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15124 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15125 = torch.operator "onnx.Unsqueeze"(%15117, %15124) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %15126 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %15127 = torch.operator "onnx.Add"(%15125, %15126) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %15128 = torch.operator "onnx.Mul"(%15123, %15127) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15129 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15130 = torch.operator "onnx.Unsqueeze"(%15114, %15129) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %15131 = torch.operator "onnx.Add"(%15128, %15130) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15132 = torch.operator "onnx.MatMul"(%15131, %1094) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15133 = torch.operator "onnx.Add"(%621, %15132) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15134 = torch.operator "onnx.Mul"(%15133, %15133) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15135 = torch.operator "onnx.Mul"(%15133, %15134) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15136 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %15137 = torch.operator "onnx.Mul"(%15136, %15135) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15138 = torch.operator "onnx.Add"(%15133, %15137) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15139 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %15140 = torch.operator "onnx.Mul"(%15139, %15138) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15141 = torch.operator "onnx.Tanh"(%15140) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15142 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %15143 = torch.operator "onnx.Add"(%15142, %15141) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15144 = torch.operator "onnx.Mul"(%15133, %15143) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15145 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %15146 = torch.operator "onnx.Mul"(%15145, %15144) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15147 = torch.operator "onnx.Shape"(%15131) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %15148 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15149 = torch.operator "onnx.Gather"(%15147, %15148) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15150 = torch.operator "onnx.MatMul"(%15131, %1095) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15151 = torch.operator "onnx.Add"(%625, %15150) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15152 = torch.operator "onnx.MatMul"(%15131, %1096) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15153 = torch.operator "onnx.Add"(%626, %15152) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15154 = torch.operator "onnx.MatMul"(%15131, %1097) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15155 = torch.operator "onnx.Add"(%627, %15154) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15156 = torch.operator "onnx.Shape"(%15153) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %15157 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15158 = torch.operator "onnx.Gather"(%15156, %15157) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15159 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15160 = torch.operator "onnx.Div"(%15158, %15159) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15161 = torch.operator "onnx.Cast"(%15160) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15162 = torch.operator "onnx.Cast"(%15161) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15163 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19705_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15164 = torch.operator "onnx.Unsqueeze"(%15149, %15163) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15165 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15166 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15167 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19709_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15168 = torch.operator "onnx.Unsqueeze"(%15162, %15167) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15169 = torch.operator "onnx.Concat"(%15164, %15165, %15166, %15168) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %15170 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19712_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15171 = torch.operator "onnx.Unsqueeze"(%15149, %15170) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15172 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15173 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15174 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19716_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15175 = torch.operator "onnx.Unsqueeze"(%15162, %15174) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15176 = torch.operator "onnx.Concat"(%15171, %15172, %15173, %15175) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %15177 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19719_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15178 = torch.operator "onnx.Unsqueeze"(%15149, %15177) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15179 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15180 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15181 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19723_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15182 = torch.operator "onnx.Unsqueeze"(%15162, %15181) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15183 = torch.operator "onnx.Concat"(%15178, %15179, %15180, %15182) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %15184 = torch.operator "onnx.Reshape"(%15151, %15169) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15185 = torch.operator "onnx.Transpose"(%15184) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %15186 = torch.operator "onnx.Reshape"(%15153, %15176) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15187 = torch.operator "onnx.Transpose"(%15186) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %15188 = torch.operator "onnx.Reshape"(%15155, %15183) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15189 = torch.operator "onnx.Transpose"(%15188) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %15190 = torch.operator "onnx.Cast"(%15185) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %15191 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %15192 = torch.operator "onnx.Pow"(%15190, %15191) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %15193 = torch.operator "onnx.ReduceMean"(%15192) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %15194 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %15195 = torch.operator "onnx.Add"(%15193, %15194) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %15196 = torch.operator "onnx.Sqrt"(%15195) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %15197 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %15198 = torch.operator "onnx.Div"(%15197, %15196) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %15199 = torch.operator "onnx.Cast"(%15185) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %15200 = torch.operator "onnx.Mul"(%15199, %15198) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %15201 = torch.operator "onnx.Cast"(%15200) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %15202 = torch.operator "onnx.Mul"(%15201, %623) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %15203 = torch.operator "onnx.Cast"(%15187) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %15204 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %15205 = torch.operator "onnx.Pow"(%15203, %15204) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %15206 = torch.operator "onnx.ReduceMean"(%15205) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %15207 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %15208 = torch.operator "onnx.Add"(%15206, %15207) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %15209 = torch.operator "onnx.Sqrt"(%15208) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %15210 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %15211 = torch.operator "onnx.Div"(%15210, %15209) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %15212 = torch.operator "onnx.Cast"(%15187) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %15213 = torch.operator "onnx.Mul"(%15212, %15211) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %15214 = torch.operator "onnx.Cast"(%15213) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %15215 = torch.operator "onnx.Mul"(%15214, %624) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %15216 = torch.operator "onnx.Shape"(%15202) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %15217 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15218 = torch.operator "onnx.Gather"(%15216, %15217) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15219 = torch.operator "onnx.Shape"(%15202) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %15220 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15221 = torch.operator "onnx.Gather"(%15219, %15220) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15222 = torch.operator "onnx.Shape"(%15202) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %15223 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15224 = torch.operator "onnx.Gather"(%15222, %15223) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15225 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19767_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15226 = torch.operator "onnx.Unsqueeze"(%15218, %15225) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15227 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19769_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15228 = torch.operator "onnx.Unsqueeze"(%15221, %15227) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15229 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19771_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15230 = torch.operator "onnx.Unsqueeze"(%15224, %15229) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15231 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15232 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15233 = torch.operator "onnx.Concat"(%15226, %15228, %15230, %15231, %15232) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %15234 = torch.operator "onnx.Reshape"(%15202, %15233) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %15235 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %15236:2 = torch.operator "onnx.Split"(%15234, %15235) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %15237 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15238 = torch.operator "onnx.Squeeze"(%15236#0, %15237) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15239 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15240 = torch.operator "onnx.Squeeze"(%15236#1, %15239) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15241 = torch.operator "onnx.Neg"(%15240) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %15242 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15243 = torch.operator "onnx.Unsqueeze"(%15241, %15242) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %15244 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15245 = torch.operator "onnx.Unsqueeze"(%15238, %15244) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %15246 = torch.operator "onnx.Concat"(%15243, %15245) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %15247 = torch.operator "onnx.Shape"(%15246) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %15248 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15249 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15250 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15251 = torch.operator "onnx.Slice"(%15247, %15249, %15250, %15248) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %15252 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15253 = torch.operator "onnx.Concat"(%15251, %15252) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %15254 = torch.operator "onnx.Reshape"(%15246, %15253) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15255 = torch.operator "onnx.Cast"(%15202) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %15256 = torch.operator "onnx.Mul"(%15255, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %15257 = torch.operator "onnx.Cast"(%15254) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %15258 = torch.operator "onnx.Mul"(%15257, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %15259 = torch.operator "onnx.Add"(%15256, %15258) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %15260 = torch.operator "onnx.Cast"(%15259) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %15261 = torch.operator "onnx.Shape"(%15215) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %15262 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15263 = torch.operator "onnx.Gather"(%15261, %15262) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15264 = torch.operator "onnx.Shape"(%15215) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %15265 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15266 = torch.operator "onnx.Gather"(%15264, %15265) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15267 = torch.operator "onnx.Shape"(%15215) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %15268 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15269 = torch.operator "onnx.Gather"(%15267, %15268) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15270 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19812_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15271 = torch.operator "onnx.Unsqueeze"(%15263, %15270) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15272 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19814_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15273 = torch.operator "onnx.Unsqueeze"(%15266, %15272) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15274 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19816_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15275 = torch.operator "onnx.Unsqueeze"(%15269, %15274) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15276 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15277 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15278 = torch.operator "onnx.Concat"(%15271, %15273, %15275, %15276, %15277) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %15279 = torch.operator "onnx.Reshape"(%15215, %15278) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %15280 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %15281:2 = torch.operator "onnx.Split"(%15279, %15280) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %15282 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15283 = torch.operator "onnx.Squeeze"(%15281#0, %15282) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15284 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15285 = torch.operator "onnx.Squeeze"(%15281#1, %15284) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15286 = torch.operator "onnx.Neg"(%15285) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %15287 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15288 = torch.operator "onnx.Unsqueeze"(%15286, %15287) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %15289 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15290 = torch.operator "onnx.Unsqueeze"(%15283, %15289) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %15291 = torch.operator "onnx.Concat"(%15288, %15290) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %15292 = torch.operator "onnx.Shape"(%15291) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %15293 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15294 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15295 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15296 = torch.operator "onnx.Slice"(%15292, %15294, %15295, %15293) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %15297 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15298 = torch.operator "onnx.Concat"(%15296, %15297) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %15299 = torch.operator "onnx.Reshape"(%15291, %15298) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15300 = torch.operator "onnx.Cast"(%15215) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %15301 = torch.operator "onnx.Mul"(%15300, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %15302 = torch.operator "onnx.Cast"(%15299) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %15303 = torch.operator "onnx.Mul"(%15302, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %15304 = torch.operator "onnx.Add"(%15301, %15303) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %15305 = torch.operator "onnx.Cast"(%15304) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %15306 = torch.operator "onnx.Shape"(%15260) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %15307 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15308 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15309 = torch.operator "onnx.Slice"(%15306, %15307, %15308) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15310 = torch.operator "onnx.Cast"(%15309) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %15311 = torch.operator "onnx.Sqrt"(%15310) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %15312 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %15313 = torch.operator "onnx.Cast"(%15311) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %15314 = torch.operator "onnx.Div"(%15312, %15313) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %15315 = torch.operator "onnx.Cast"(%15314) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %15316 = torch.operator "onnx.Transpose"(%15305) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %15317 = torch.operator "onnx.Sqrt"(%15315) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %15318 = torch.operator "onnx.Mul"(%15260, %15317) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %15319 = torch.operator "onnx.Sqrt"(%15315) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %15320 = torch.operator "onnx.Mul"(%15316, %15319) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %15321 = torch.operator "onnx.MatMul"(%15318, %15320) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %15322 = torch.operator "onnx.Softmax"(%15321) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %15323 = torch.operator "onnx.MatMul"(%15322, %15189) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %15324 = torch.operator "onnx.Transpose"(%15323) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %15325 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15326 = torch.operator "onnx.Mul"(%15162, %15325) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15327 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19869_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15328 = torch.operator "onnx.Unsqueeze"(%15149, %15327) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15329 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15330 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19872_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15331 = torch.operator "onnx.Unsqueeze"(%15326, %15330) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15332 = torch.operator "onnx.Concat"(%15328, %15329, %15331) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %15333 = torch.operator "onnx.Reshape"(%15324, %15332) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %15334 = torch.operator "onnx.Cast"(%15333) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %15335 = torch.operator "onnx.Concat"(%15334, %15146) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %15336 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.25_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15337 = torch.operator "onnx.Unsqueeze"(%15120, %15336) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %15338 = torch.operator "onnx.MatMul"(%15335, %1098) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15339 = torch.operator "onnx.Add"(%622, %15338) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15340 = torch.operator "onnx.Mul"(%15337, %15339) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15341 = torch.operator "onnx.Add"(%15102, %15340) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15342 = torch.operator "onnx.Gemm"(%1285, %628, %629) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %15343 = torch.operator "onnx.Shape"(%15342) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %15344 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15345 = torch.operator "onnx.Gather"(%15343, %15344) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15346 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15347 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15348 = torch.operator "onnx.Add"(%15345, %15347) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15349 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15350 = torch.operator "onnx.Div"(%15348, %15349) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15351 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15352 = torch.operator "onnx.Mul"(%15350, %15351) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15353 = torch.operator "onnx.Slice"(%15342, %15346, %15352, %15344) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %15354 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15355 = torch.operator "onnx.Mul"(%15350, %15354) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15356 = torch.operator "onnx.Slice"(%15342, %15352, %15355, %15344) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %15357 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15358 = torch.operator "onnx.Mul"(%15350, %15357) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15359 = torch.operator "onnx.Slice"(%15342, %15355, %15358, %15344) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.262Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.26/norm/norm/Constant_attr__value" : tensor<3072xbf16> %15360 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.262Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.262Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.26/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %15361 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.262Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %15362 = torch.operator "onnx.LayerNormalization"(%15341, %15360, %15361) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15363 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15364 = torch.operator "onnx.Unsqueeze"(%15356, %15363) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %15365 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %15366 = torch.operator "onnx.Add"(%15364, %15365) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %15367 = torch.operator "onnx.Mul"(%15362, %15366) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15368 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15369 = torch.operator "onnx.Unsqueeze"(%15353, %15368) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %15370 = torch.operator "onnx.Add"(%15367, %15369) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15371 = torch.operator "onnx.MatMul"(%15370, %1099) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15372 = torch.operator "onnx.Add"(%630, %15371) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15373 = torch.operator "onnx.Mul"(%15372, %15372) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15374 = torch.operator "onnx.Mul"(%15372, %15373) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15375 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %15376 = torch.operator "onnx.Mul"(%15375, %15374) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15377 = torch.operator "onnx.Add"(%15372, %15376) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15378 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %15379 = torch.operator "onnx.Mul"(%15378, %15377) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15380 = torch.operator "onnx.Tanh"(%15379) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15381 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %15382 = torch.operator "onnx.Add"(%15381, %15380) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15383 = torch.operator "onnx.Mul"(%15372, %15382) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15384 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %15385 = torch.operator "onnx.Mul"(%15384, %15383) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15386 = torch.operator "onnx.Shape"(%15370) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %15387 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15388 = torch.operator "onnx.Gather"(%15386, %15387) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15389 = torch.operator "onnx.MatMul"(%15370, %1100) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15390 = torch.operator "onnx.Add"(%634, %15389) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15391 = torch.operator "onnx.MatMul"(%15370, %1101) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15392 = torch.operator "onnx.Add"(%635, %15391) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15393 = torch.operator "onnx.MatMul"(%15370, %1102) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15394 = torch.operator "onnx.Add"(%636, %15393) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15395 = torch.operator "onnx.Shape"(%15392) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %15396 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15397 = torch.operator "onnx.Gather"(%15395, %15396) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15398 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15399 = torch.operator "onnx.Div"(%15397, %15398) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15400 = torch.operator "onnx.Cast"(%15399) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15401 = torch.operator "onnx.Cast"(%15400) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15402 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19944_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15403 = torch.operator "onnx.Unsqueeze"(%15388, %15402) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15404 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15405 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15406 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19948_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15407 = torch.operator "onnx.Unsqueeze"(%15401, %15406) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15408 = torch.operator "onnx.Concat"(%15403, %15404, %15405, %15407) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %15409 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19951_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15410 = torch.operator "onnx.Unsqueeze"(%15388, %15409) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15411 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15412 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15413 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19955_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15414 = torch.operator "onnx.Unsqueeze"(%15401, %15413) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15415 = torch.operator "onnx.Concat"(%15410, %15411, %15412, %15414) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %15416 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19958_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15417 = torch.operator "onnx.Unsqueeze"(%15388, %15416) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15418 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15419 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15420 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_19962_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15421 = torch.operator "onnx.Unsqueeze"(%15401, %15420) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15422 = torch.operator "onnx.Concat"(%15417, %15418, %15419, %15421) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %15423 = torch.operator "onnx.Reshape"(%15390, %15408) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15424 = torch.operator "onnx.Transpose"(%15423) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %15425 = torch.operator "onnx.Reshape"(%15392, %15415) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15426 = torch.operator "onnx.Transpose"(%15425) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %15427 = torch.operator "onnx.Reshape"(%15394, %15422) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15428 = torch.operator "onnx.Transpose"(%15427) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %15429 = torch.operator "onnx.Cast"(%15424) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %15430 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %15431 = torch.operator "onnx.Pow"(%15429, %15430) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %15432 = torch.operator "onnx.ReduceMean"(%15431) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %15433 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %15434 = torch.operator "onnx.Add"(%15432, %15433) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %15435 = torch.operator "onnx.Sqrt"(%15434) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %15436 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %15437 = torch.operator "onnx.Div"(%15436, %15435) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %15438 = torch.operator "onnx.Cast"(%15424) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %15439 = torch.operator "onnx.Mul"(%15438, %15437) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %15440 = torch.operator "onnx.Cast"(%15439) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %15441 = torch.operator "onnx.Mul"(%15440, %632) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %15442 = torch.operator "onnx.Cast"(%15426) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %15443 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %15444 = torch.operator "onnx.Pow"(%15442, %15443) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %15445 = torch.operator "onnx.ReduceMean"(%15444) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %15446 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %15447 = torch.operator "onnx.Add"(%15445, %15446) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %15448 = torch.operator "onnx.Sqrt"(%15447) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %15449 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %15450 = torch.operator "onnx.Div"(%15449, %15448) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %15451 = torch.operator "onnx.Cast"(%15426) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %15452 = torch.operator "onnx.Mul"(%15451, %15450) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %15453 = torch.operator "onnx.Cast"(%15452) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %15454 = torch.operator "onnx.Mul"(%15453, %633) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %15455 = torch.operator "onnx.Shape"(%15441) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %15456 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15457 = torch.operator "onnx.Gather"(%15455, %15456) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15458 = torch.operator "onnx.Shape"(%15441) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %15459 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15460 = torch.operator "onnx.Gather"(%15458, %15459) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15461 = torch.operator "onnx.Shape"(%15441) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %15462 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15463 = torch.operator "onnx.Gather"(%15461, %15462) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15464 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20006_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15465 = torch.operator "onnx.Unsqueeze"(%15457, %15464) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15466 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20008_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15467 = torch.operator "onnx.Unsqueeze"(%15460, %15466) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15468 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20010_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15469 = torch.operator "onnx.Unsqueeze"(%15463, %15468) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15470 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15471 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15472 = torch.operator "onnx.Concat"(%15465, %15467, %15469, %15470, %15471) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %15473 = torch.operator "onnx.Reshape"(%15441, %15472) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %15474 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %15475:2 = torch.operator "onnx.Split"(%15473, %15474) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %15476 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15477 = torch.operator "onnx.Squeeze"(%15475#0, %15476) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15478 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15479 = torch.operator "onnx.Squeeze"(%15475#1, %15478) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15480 = torch.operator "onnx.Neg"(%15479) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %15481 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15482 = torch.operator "onnx.Unsqueeze"(%15480, %15481) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %15483 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15484 = torch.operator "onnx.Unsqueeze"(%15477, %15483) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %15485 = torch.operator "onnx.Concat"(%15482, %15484) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %15486 = torch.operator "onnx.Shape"(%15485) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %15487 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15488 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15489 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15490 = torch.operator "onnx.Slice"(%15486, %15488, %15489, %15487) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %15491 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15492 = torch.operator "onnx.Concat"(%15490, %15491) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %15493 = torch.operator "onnx.Reshape"(%15485, %15492) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15494 = torch.operator "onnx.Cast"(%15441) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %15495 = torch.operator "onnx.Mul"(%15494, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %15496 = torch.operator "onnx.Cast"(%15493) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %15497 = torch.operator "onnx.Mul"(%15496, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %15498 = torch.operator "onnx.Add"(%15495, %15497) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %15499 = torch.operator "onnx.Cast"(%15498) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %15500 = torch.operator "onnx.Shape"(%15454) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %15501 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15502 = torch.operator "onnx.Gather"(%15500, %15501) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15503 = torch.operator "onnx.Shape"(%15454) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %15504 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15505 = torch.operator "onnx.Gather"(%15503, %15504) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15506 = torch.operator "onnx.Shape"(%15454) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %15507 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15508 = torch.operator "onnx.Gather"(%15506, %15507) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15509 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20051_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15510 = torch.operator "onnx.Unsqueeze"(%15502, %15509) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15511 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20053_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15512 = torch.operator "onnx.Unsqueeze"(%15505, %15511) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15513 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20055_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15514 = torch.operator "onnx.Unsqueeze"(%15508, %15513) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15515 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15516 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15517 = torch.operator "onnx.Concat"(%15510, %15512, %15514, %15515, %15516) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %15518 = torch.operator "onnx.Reshape"(%15454, %15517) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %15519 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %15520:2 = torch.operator "onnx.Split"(%15518, %15519) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %15521 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15522 = torch.operator "onnx.Squeeze"(%15520#0, %15521) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15523 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15524 = torch.operator "onnx.Squeeze"(%15520#1, %15523) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15525 = torch.operator "onnx.Neg"(%15524) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %15526 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15527 = torch.operator "onnx.Unsqueeze"(%15525, %15526) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %15528 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15529 = torch.operator "onnx.Unsqueeze"(%15522, %15528) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %15530 = torch.operator "onnx.Concat"(%15527, %15529) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %15531 = torch.operator "onnx.Shape"(%15530) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %15532 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15533 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15534 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15535 = torch.operator "onnx.Slice"(%15531, %15533, %15534, %15532) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %15536 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15537 = torch.operator "onnx.Concat"(%15535, %15536) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %15538 = torch.operator "onnx.Reshape"(%15530, %15537) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15539 = torch.operator "onnx.Cast"(%15454) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %15540 = torch.operator "onnx.Mul"(%15539, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %15541 = torch.operator "onnx.Cast"(%15538) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %15542 = torch.operator "onnx.Mul"(%15541, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %15543 = torch.operator "onnx.Add"(%15540, %15542) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %15544 = torch.operator "onnx.Cast"(%15543) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %15545 = torch.operator "onnx.Shape"(%15499) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %15546 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15547 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15548 = torch.operator "onnx.Slice"(%15545, %15546, %15547) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15549 = torch.operator "onnx.Cast"(%15548) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %15550 = torch.operator "onnx.Sqrt"(%15549) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %15551 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %15552 = torch.operator "onnx.Cast"(%15550) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %15553 = torch.operator "onnx.Div"(%15551, %15552) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %15554 = torch.operator "onnx.Cast"(%15553) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %15555 = torch.operator "onnx.Transpose"(%15544) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %15556 = torch.operator "onnx.Sqrt"(%15554) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %15557 = torch.operator "onnx.Mul"(%15499, %15556) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %15558 = torch.operator "onnx.Sqrt"(%15554) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %15559 = torch.operator "onnx.Mul"(%15555, %15558) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %15560 = torch.operator "onnx.MatMul"(%15557, %15559) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %15561 = torch.operator "onnx.Softmax"(%15560) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %15562 = torch.operator "onnx.MatMul"(%15561, %15428) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %15563 = torch.operator "onnx.Transpose"(%15562) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %15564 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15565 = torch.operator "onnx.Mul"(%15401, %15564) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15566 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20108_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15567 = torch.operator "onnx.Unsqueeze"(%15388, %15566) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15568 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15569 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20111_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15570 = torch.operator "onnx.Unsqueeze"(%15565, %15569) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15571 = torch.operator "onnx.Concat"(%15567, %15568, %15570) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %15572 = torch.operator "onnx.Reshape"(%15563, %15571) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %15573 = torch.operator "onnx.Cast"(%15572) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %15574 = torch.operator "onnx.Concat"(%15573, %15385) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %15575 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.26_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15576 = torch.operator "onnx.Unsqueeze"(%15359, %15575) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %15577 = torch.operator "onnx.MatMul"(%15574, %1103) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15578 = torch.operator "onnx.Add"(%631, %15577) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15579 = torch.operator "onnx.Mul"(%15576, %15578) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15580 = torch.operator "onnx.Add"(%15341, %15579) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15581 = torch.operator "onnx.Gemm"(%1285, %637, %638) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %15582 = torch.operator "onnx.Shape"(%15581) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %15583 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15584 = torch.operator "onnx.Gather"(%15582, %15583) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15585 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15586 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15587 = torch.operator "onnx.Add"(%15584, %15586) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15588 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15589 = torch.operator "onnx.Div"(%15587, %15588) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15590 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15591 = torch.operator "onnx.Mul"(%15589, %15590) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15592 = torch.operator "onnx.Slice"(%15581, %15585, %15591, %15583) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %15593 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15594 = torch.operator "onnx.Mul"(%15589, %15593) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15595 = torch.operator "onnx.Slice"(%15581, %15591, %15594, %15583) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %15596 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15597 = torch.operator "onnx.Mul"(%15589, %15596) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15598 = torch.operator "onnx.Slice"(%15581, %15594, %15597, %15583) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.272Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.27/norm/norm/Constant_attr__value" : tensor<3072xbf16> %15599 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.272Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.272Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.27/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %15600 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.272Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %15601 = torch.operator "onnx.LayerNormalization"(%15580, %15599, %15600) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15602 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15603 = torch.operator "onnx.Unsqueeze"(%15595, %15602) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %15604 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %15605 = torch.operator "onnx.Add"(%15603, %15604) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %15606 = torch.operator "onnx.Mul"(%15601, %15605) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15607 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15608 = torch.operator "onnx.Unsqueeze"(%15592, %15607) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %15609 = torch.operator "onnx.Add"(%15606, %15608) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15610 = torch.operator "onnx.MatMul"(%15609, %1104) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15611 = torch.operator "onnx.Add"(%639, %15610) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15612 = torch.operator "onnx.Mul"(%15611, %15611) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15613 = torch.operator "onnx.Mul"(%15611, %15612) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15614 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %15615 = torch.operator "onnx.Mul"(%15614, %15613) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15616 = torch.operator "onnx.Add"(%15611, %15615) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15617 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %15618 = torch.operator "onnx.Mul"(%15617, %15616) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15619 = torch.operator "onnx.Tanh"(%15618) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15620 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %15621 = torch.operator "onnx.Add"(%15620, %15619) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15622 = torch.operator "onnx.Mul"(%15611, %15621) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15623 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %15624 = torch.operator "onnx.Mul"(%15623, %15622) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15625 = torch.operator "onnx.Shape"(%15609) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %15626 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15627 = torch.operator "onnx.Gather"(%15625, %15626) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15628 = torch.operator "onnx.MatMul"(%15609, %1105) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15629 = torch.operator "onnx.Add"(%643, %15628) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15630 = torch.operator "onnx.MatMul"(%15609, %1106) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15631 = torch.operator "onnx.Add"(%644, %15630) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15632 = torch.operator "onnx.MatMul"(%15609, %1107) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15633 = torch.operator "onnx.Add"(%645, %15632) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15634 = torch.operator "onnx.Shape"(%15631) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %15635 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15636 = torch.operator "onnx.Gather"(%15634, %15635) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15637 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15638 = torch.operator "onnx.Div"(%15636, %15637) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15639 = torch.operator "onnx.Cast"(%15638) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15640 = torch.operator "onnx.Cast"(%15639) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15641 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20183_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15642 = torch.operator "onnx.Unsqueeze"(%15627, %15641) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15643 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15644 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15645 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20187_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15646 = torch.operator "onnx.Unsqueeze"(%15640, %15645) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15647 = torch.operator "onnx.Concat"(%15642, %15643, %15644, %15646) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %15648 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20190_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15649 = torch.operator "onnx.Unsqueeze"(%15627, %15648) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15650 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15651 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15652 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20194_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15653 = torch.operator "onnx.Unsqueeze"(%15640, %15652) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15654 = torch.operator "onnx.Concat"(%15649, %15650, %15651, %15653) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %15655 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20197_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15656 = torch.operator "onnx.Unsqueeze"(%15627, %15655) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15657 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15658 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15659 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20201_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15660 = torch.operator "onnx.Unsqueeze"(%15640, %15659) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15661 = torch.operator "onnx.Concat"(%15656, %15657, %15658, %15660) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %15662 = torch.operator "onnx.Reshape"(%15629, %15647) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15663 = torch.operator "onnx.Transpose"(%15662) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %15664 = torch.operator "onnx.Reshape"(%15631, %15654) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15665 = torch.operator "onnx.Transpose"(%15664) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %15666 = torch.operator "onnx.Reshape"(%15633, %15661) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15667 = torch.operator "onnx.Transpose"(%15666) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %15668 = torch.operator "onnx.Cast"(%15663) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %15669 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %15670 = torch.operator "onnx.Pow"(%15668, %15669) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %15671 = torch.operator "onnx.ReduceMean"(%15670) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %15672 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %15673 = torch.operator "onnx.Add"(%15671, %15672) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %15674 = torch.operator "onnx.Sqrt"(%15673) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %15675 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %15676 = torch.operator "onnx.Div"(%15675, %15674) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %15677 = torch.operator "onnx.Cast"(%15663) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %15678 = torch.operator "onnx.Mul"(%15677, %15676) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %15679 = torch.operator "onnx.Cast"(%15678) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %15680 = torch.operator "onnx.Mul"(%15679, %641) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %15681 = torch.operator "onnx.Cast"(%15665) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %15682 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %15683 = torch.operator "onnx.Pow"(%15681, %15682) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %15684 = torch.operator "onnx.ReduceMean"(%15683) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %15685 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %15686 = torch.operator "onnx.Add"(%15684, %15685) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %15687 = torch.operator "onnx.Sqrt"(%15686) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %15688 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %15689 = torch.operator "onnx.Div"(%15688, %15687) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %15690 = torch.operator "onnx.Cast"(%15665) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %15691 = torch.operator "onnx.Mul"(%15690, %15689) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %15692 = torch.operator "onnx.Cast"(%15691) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %15693 = torch.operator "onnx.Mul"(%15692, %642) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %15694 = torch.operator "onnx.Shape"(%15680) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %15695 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15696 = torch.operator "onnx.Gather"(%15694, %15695) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15697 = torch.operator "onnx.Shape"(%15680) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %15698 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15699 = torch.operator "onnx.Gather"(%15697, %15698) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15700 = torch.operator "onnx.Shape"(%15680) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %15701 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15702 = torch.operator "onnx.Gather"(%15700, %15701) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15703 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20245_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15704 = torch.operator "onnx.Unsqueeze"(%15696, %15703) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15705 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20247_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15706 = torch.operator "onnx.Unsqueeze"(%15699, %15705) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15707 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20249_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15708 = torch.operator "onnx.Unsqueeze"(%15702, %15707) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15709 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15710 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15711 = torch.operator "onnx.Concat"(%15704, %15706, %15708, %15709, %15710) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %15712 = torch.operator "onnx.Reshape"(%15680, %15711) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %15713 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %15714:2 = torch.operator "onnx.Split"(%15712, %15713) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %15715 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15716 = torch.operator "onnx.Squeeze"(%15714#0, %15715) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15717 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15718 = torch.operator "onnx.Squeeze"(%15714#1, %15717) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15719 = torch.operator "onnx.Neg"(%15718) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %15720 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15721 = torch.operator "onnx.Unsqueeze"(%15719, %15720) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %15722 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15723 = torch.operator "onnx.Unsqueeze"(%15716, %15722) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %15724 = torch.operator "onnx.Concat"(%15721, %15723) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %15725 = torch.operator "onnx.Shape"(%15724) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %15726 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15727 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15728 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15729 = torch.operator "onnx.Slice"(%15725, %15727, %15728, %15726) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %15730 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15731 = torch.operator "onnx.Concat"(%15729, %15730) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %15732 = torch.operator "onnx.Reshape"(%15724, %15731) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15733 = torch.operator "onnx.Cast"(%15680) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %15734 = torch.operator "onnx.Mul"(%15733, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %15735 = torch.operator "onnx.Cast"(%15732) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %15736 = torch.operator "onnx.Mul"(%15735, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %15737 = torch.operator "onnx.Add"(%15734, %15736) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %15738 = torch.operator "onnx.Cast"(%15737) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %15739 = torch.operator "onnx.Shape"(%15693) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %15740 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15741 = torch.operator "onnx.Gather"(%15739, %15740) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15742 = torch.operator "onnx.Shape"(%15693) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %15743 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15744 = torch.operator "onnx.Gather"(%15742, %15743) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15745 = torch.operator "onnx.Shape"(%15693) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %15746 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15747 = torch.operator "onnx.Gather"(%15745, %15746) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15748 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20290_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15749 = torch.operator "onnx.Unsqueeze"(%15741, %15748) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15750 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20292_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15751 = torch.operator "onnx.Unsqueeze"(%15744, %15750) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15752 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20294_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15753 = torch.operator "onnx.Unsqueeze"(%15747, %15752) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15754 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15755 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15756 = torch.operator "onnx.Concat"(%15749, %15751, %15753, %15754, %15755) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %15757 = torch.operator "onnx.Reshape"(%15693, %15756) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %15758 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %15759:2 = torch.operator "onnx.Split"(%15757, %15758) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %15760 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15761 = torch.operator "onnx.Squeeze"(%15759#0, %15760) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15762 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15763 = torch.operator "onnx.Squeeze"(%15759#1, %15762) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15764 = torch.operator "onnx.Neg"(%15763) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %15765 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15766 = torch.operator "onnx.Unsqueeze"(%15764, %15765) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %15767 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15768 = torch.operator "onnx.Unsqueeze"(%15761, %15767) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %15769 = torch.operator "onnx.Concat"(%15766, %15768) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %15770 = torch.operator "onnx.Shape"(%15769) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %15771 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15772 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15773 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15774 = torch.operator "onnx.Slice"(%15770, %15772, %15773, %15771) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %15775 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15776 = torch.operator "onnx.Concat"(%15774, %15775) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %15777 = torch.operator "onnx.Reshape"(%15769, %15776) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15778 = torch.operator "onnx.Cast"(%15693) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %15779 = torch.operator "onnx.Mul"(%15778, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %15780 = torch.operator "onnx.Cast"(%15777) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %15781 = torch.operator "onnx.Mul"(%15780, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %15782 = torch.operator "onnx.Add"(%15779, %15781) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %15783 = torch.operator "onnx.Cast"(%15782) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %15784 = torch.operator "onnx.Shape"(%15738) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %15785 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15786 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15787 = torch.operator "onnx.Slice"(%15784, %15785, %15786) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15788 = torch.operator "onnx.Cast"(%15787) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %15789 = torch.operator "onnx.Sqrt"(%15788) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %15790 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %15791 = torch.operator "onnx.Cast"(%15789) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %15792 = torch.operator "onnx.Div"(%15790, %15791) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %15793 = torch.operator "onnx.Cast"(%15792) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %15794 = torch.operator "onnx.Transpose"(%15783) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %15795 = torch.operator "onnx.Sqrt"(%15793) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %15796 = torch.operator "onnx.Mul"(%15738, %15795) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %15797 = torch.operator "onnx.Sqrt"(%15793) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %15798 = torch.operator "onnx.Mul"(%15794, %15797) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %15799 = torch.operator "onnx.MatMul"(%15796, %15798) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %15800 = torch.operator "onnx.Softmax"(%15799) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %15801 = torch.operator "onnx.MatMul"(%15800, %15667) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %15802 = torch.operator "onnx.Transpose"(%15801) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %15803 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15804 = torch.operator "onnx.Mul"(%15640, %15803) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15805 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20347_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15806 = torch.operator "onnx.Unsqueeze"(%15627, %15805) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15807 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15808 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20350_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15809 = torch.operator "onnx.Unsqueeze"(%15804, %15808) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15810 = torch.operator "onnx.Concat"(%15806, %15807, %15809) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %15811 = torch.operator "onnx.Reshape"(%15802, %15810) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %15812 = torch.operator "onnx.Cast"(%15811) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %15813 = torch.operator "onnx.Concat"(%15812, %15624) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %15814 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.27_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15815 = torch.operator "onnx.Unsqueeze"(%15598, %15814) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %15816 = torch.operator "onnx.MatMul"(%15813, %1108) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15817 = torch.operator "onnx.Add"(%640, %15816) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15818 = torch.operator "onnx.Mul"(%15815, %15817) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15819 = torch.operator "onnx.Add"(%15580, %15818) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15820 = torch.operator "onnx.Gemm"(%1285, %646, %647) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %15821 = torch.operator "onnx.Shape"(%15820) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %15822 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15823 = torch.operator "onnx.Gather"(%15821, %15822) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15824 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15825 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15826 = torch.operator "onnx.Add"(%15823, %15825) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15827 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15828 = torch.operator "onnx.Div"(%15826, %15827) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15829 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15830 = torch.operator "onnx.Mul"(%15828, %15829) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15831 = torch.operator "onnx.Slice"(%15820, %15824, %15830, %15822) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %15832 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15833 = torch.operator "onnx.Mul"(%15828, %15832) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15834 = torch.operator "onnx.Slice"(%15820, %15830, %15833, %15822) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %15835 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15836 = torch.operator "onnx.Mul"(%15828, %15835) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15837 = torch.operator "onnx.Slice"(%15820, %15833, %15836, %15822) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.282Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.28/norm/norm/Constant_attr__value" : tensor<3072xbf16> %15838 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.282Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.282Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.28/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %15839 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.282Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %15840 = torch.operator "onnx.LayerNormalization"(%15819, %15838, %15839) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15841 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15842 = torch.operator "onnx.Unsqueeze"(%15834, %15841) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %15843 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %15844 = torch.operator "onnx.Add"(%15842, %15843) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %15845 = torch.operator "onnx.Mul"(%15840, %15844) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15846 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15847 = torch.operator "onnx.Unsqueeze"(%15831, %15846) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %15848 = torch.operator "onnx.Add"(%15845, %15847) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15849 = torch.operator "onnx.MatMul"(%15848, %1109) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15850 = torch.operator "onnx.Add"(%648, %15849) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15851 = torch.operator "onnx.Mul"(%15850, %15850) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15852 = torch.operator "onnx.Mul"(%15850, %15851) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15853 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %15854 = torch.operator "onnx.Mul"(%15853, %15852) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15855 = torch.operator "onnx.Add"(%15850, %15854) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15856 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %15857 = torch.operator "onnx.Mul"(%15856, %15855) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15858 = torch.operator "onnx.Tanh"(%15857) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15859 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %15860 = torch.operator "onnx.Add"(%15859, %15858) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15861 = torch.operator "onnx.Mul"(%15850, %15860) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15862 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %15863 = torch.operator "onnx.Mul"(%15862, %15861) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %15864 = torch.operator "onnx.Shape"(%15848) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %15865 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15866 = torch.operator "onnx.Gather"(%15864, %15865) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15867 = torch.operator "onnx.MatMul"(%15848, %1110) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15868 = torch.operator "onnx.Add"(%652, %15867) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15869 = torch.operator "onnx.MatMul"(%15848, %1111) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15870 = torch.operator "onnx.Add"(%653, %15869) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15871 = torch.operator "onnx.MatMul"(%15848, %1112) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15872 = torch.operator "onnx.Add"(%654, %15871) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %15873 = torch.operator "onnx.Shape"(%15870) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %15874 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15875 = torch.operator "onnx.Gather"(%15873, %15874) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15876 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15877 = torch.operator "onnx.Div"(%15875, %15876) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15878 = torch.operator "onnx.Cast"(%15877) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15879 = torch.operator "onnx.Cast"(%15878) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15880 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20422_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15881 = torch.operator "onnx.Unsqueeze"(%15866, %15880) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15882 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15883 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15884 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20426_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15885 = torch.operator "onnx.Unsqueeze"(%15879, %15884) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15886 = torch.operator "onnx.Concat"(%15881, %15882, %15883, %15885) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %15887 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20429_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15888 = torch.operator "onnx.Unsqueeze"(%15866, %15887) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15889 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15890 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15891 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20433_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15892 = torch.operator "onnx.Unsqueeze"(%15879, %15891) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15893 = torch.operator "onnx.Concat"(%15888, %15889, %15890, %15892) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %15894 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20436_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15895 = torch.operator "onnx.Unsqueeze"(%15866, %15894) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15896 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15897 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15898 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20440_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15899 = torch.operator "onnx.Unsqueeze"(%15879, %15898) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15900 = torch.operator "onnx.Concat"(%15895, %15896, %15897, %15899) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %15901 = torch.operator "onnx.Reshape"(%15868, %15886) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15902 = torch.operator "onnx.Transpose"(%15901) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %15903 = torch.operator "onnx.Reshape"(%15870, %15893) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15904 = torch.operator "onnx.Transpose"(%15903) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %15905 = torch.operator "onnx.Reshape"(%15872, %15900) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15906 = torch.operator "onnx.Transpose"(%15905) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %15907 = torch.operator "onnx.Cast"(%15902) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %15908 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %15909 = torch.operator "onnx.Pow"(%15907, %15908) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %15910 = torch.operator "onnx.ReduceMean"(%15909) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %15911 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %15912 = torch.operator "onnx.Add"(%15910, %15911) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %15913 = torch.operator "onnx.Sqrt"(%15912) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %15914 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %15915 = torch.operator "onnx.Div"(%15914, %15913) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %15916 = torch.operator "onnx.Cast"(%15902) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %15917 = torch.operator "onnx.Mul"(%15916, %15915) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %15918 = torch.operator "onnx.Cast"(%15917) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %15919 = torch.operator "onnx.Mul"(%15918, %650) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %15920 = torch.operator "onnx.Cast"(%15904) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %15921 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %15922 = torch.operator "onnx.Pow"(%15920, %15921) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %15923 = torch.operator "onnx.ReduceMean"(%15922) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %15924 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %15925 = torch.operator "onnx.Add"(%15923, %15924) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %15926 = torch.operator "onnx.Sqrt"(%15925) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %15927 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %15928 = torch.operator "onnx.Div"(%15927, %15926) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %15929 = torch.operator "onnx.Cast"(%15904) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %15930 = torch.operator "onnx.Mul"(%15929, %15928) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %15931 = torch.operator "onnx.Cast"(%15930) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %15932 = torch.operator "onnx.Mul"(%15931, %651) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %15933 = torch.operator "onnx.Shape"(%15919) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %15934 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15935 = torch.operator "onnx.Gather"(%15933, %15934) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15936 = torch.operator "onnx.Shape"(%15919) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %15937 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15938 = torch.operator "onnx.Gather"(%15936, %15937) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15939 = torch.operator "onnx.Shape"(%15919) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %15940 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15941 = torch.operator "onnx.Gather"(%15939, %15940) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15942 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20484_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15943 = torch.operator "onnx.Unsqueeze"(%15935, %15942) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15944 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20486_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15945 = torch.operator "onnx.Unsqueeze"(%15938, %15944) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15946 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20488_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15947 = torch.operator "onnx.Unsqueeze"(%15941, %15946) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15948 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15949 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15950 = torch.operator "onnx.Concat"(%15943, %15945, %15947, %15948, %15949) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %15951 = torch.operator "onnx.Reshape"(%15919, %15950) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %15952 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %15953:2 = torch.operator "onnx.Split"(%15951, %15952) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %15954 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15955 = torch.operator "onnx.Squeeze"(%15953#0, %15954) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15956 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15957 = torch.operator "onnx.Squeeze"(%15953#1, %15956) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15958 = torch.operator "onnx.Neg"(%15957) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %15959 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15960 = torch.operator "onnx.Unsqueeze"(%15958, %15959) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %15961 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15962 = torch.operator "onnx.Unsqueeze"(%15955, %15961) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %15963 = torch.operator "onnx.Concat"(%15960, %15962) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %15964 = torch.operator "onnx.Shape"(%15963) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %15965 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15966 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15967 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15968 = torch.operator "onnx.Slice"(%15964, %15966, %15967, %15965) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %15969 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15970 = torch.operator "onnx.Concat"(%15968, %15969) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %15971 = torch.operator "onnx.Reshape"(%15963, %15970) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %15972 = torch.operator "onnx.Cast"(%15919) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %15973 = torch.operator "onnx.Mul"(%15972, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %15974 = torch.operator "onnx.Cast"(%15971) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %15975 = torch.operator "onnx.Mul"(%15974, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %15976 = torch.operator "onnx.Add"(%15973, %15975) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %15977 = torch.operator "onnx.Cast"(%15976) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %15978 = torch.operator "onnx.Shape"(%15932) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %15979 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15980 = torch.operator "onnx.Gather"(%15978, %15979) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15981 = torch.operator "onnx.Shape"(%15932) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %15982 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15983 = torch.operator "onnx.Gather"(%15981, %15982) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15984 = torch.operator "onnx.Shape"(%15932) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %15985 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %15986 = torch.operator "onnx.Gather"(%15984, %15985) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %15987 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20529_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15988 = torch.operator "onnx.Unsqueeze"(%15980, %15987) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15989 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20531_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15990 = torch.operator "onnx.Unsqueeze"(%15983, %15989) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15991 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20533_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15992 = torch.operator "onnx.Unsqueeze"(%15986, %15991) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %15993 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15994 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %15995 = torch.operator "onnx.Concat"(%15988, %15990, %15992, %15993, %15994) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %15996 = torch.operator "onnx.Reshape"(%15932, %15995) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %15997 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %15998:2 = torch.operator "onnx.Split"(%15996, %15997) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %15999 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16000 = torch.operator "onnx.Squeeze"(%15998#0, %15999) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16001 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16002 = torch.operator "onnx.Squeeze"(%15998#1, %16001) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16003 = torch.operator "onnx.Neg"(%16002) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %16004 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16005 = torch.operator "onnx.Unsqueeze"(%16003, %16004) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %16006 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16007 = torch.operator "onnx.Unsqueeze"(%16000, %16006) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %16008 = torch.operator "onnx.Concat"(%16005, %16007) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %16009 = torch.operator "onnx.Shape"(%16008) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %16010 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16011 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16012 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16013 = torch.operator "onnx.Slice"(%16009, %16011, %16012, %16010) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %16014 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16015 = torch.operator "onnx.Concat"(%16013, %16014) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %16016 = torch.operator "onnx.Reshape"(%16008, %16015) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16017 = torch.operator "onnx.Cast"(%15932) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %16018 = torch.operator "onnx.Mul"(%16017, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %16019 = torch.operator "onnx.Cast"(%16016) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %16020 = torch.operator "onnx.Mul"(%16019, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %16021 = torch.operator "onnx.Add"(%16018, %16020) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %16022 = torch.operator "onnx.Cast"(%16021) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %16023 = torch.operator "onnx.Shape"(%15977) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %16024 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16025 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16026 = torch.operator "onnx.Slice"(%16023, %16024, %16025) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16027 = torch.operator "onnx.Cast"(%16026) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %16028 = torch.operator "onnx.Sqrt"(%16027) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %16029 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %16030 = torch.operator "onnx.Cast"(%16028) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %16031 = torch.operator "onnx.Div"(%16029, %16030) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %16032 = torch.operator "onnx.Cast"(%16031) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %16033 = torch.operator "onnx.Transpose"(%16022) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %16034 = torch.operator "onnx.Sqrt"(%16032) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %16035 = torch.operator "onnx.Mul"(%15977, %16034) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %16036 = torch.operator "onnx.Sqrt"(%16032) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %16037 = torch.operator "onnx.Mul"(%16033, %16036) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %16038 = torch.operator "onnx.MatMul"(%16035, %16037) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %16039 = torch.operator "onnx.Softmax"(%16038) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %16040 = torch.operator "onnx.MatMul"(%16039, %15906) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %16041 = torch.operator "onnx.Transpose"(%16040) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %16042 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16043 = torch.operator "onnx.Mul"(%15879, %16042) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16044 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20586_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16045 = torch.operator "onnx.Unsqueeze"(%15866, %16044) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16046 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16047 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20589_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16048 = torch.operator "onnx.Unsqueeze"(%16043, %16047) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16049 = torch.operator "onnx.Concat"(%16045, %16046, %16048) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %16050 = torch.operator "onnx.Reshape"(%16041, %16049) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %16051 = torch.operator "onnx.Cast"(%16050) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %16052 = torch.operator "onnx.Concat"(%16051, %15863) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %16053 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.28_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16054 = torch.operator "onnx.Unsqueeze"(%15837, %16053) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %16055 = torch.operator "onnx.MatMul"(%16052, %1113) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16056 = torch.operator "onnx.Add"(%649, %16055) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16057 = torch.operator "onnx.Mul"(%16054, %16056) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16058 = torch.operator "onnx.Add"(%15819, %16057) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16059 = torch.operator "onnx.Gemm"(%1285, %655, %656) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %16060 = torch.operator "onnx.Shape"(%16059) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %16061 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16062 = torch.operator "onnx.Gather"(%16060, %16061) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16063 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16064 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16065 = torch.operator "onnx.Add"(%16062, %16064) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16066 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16067 = torch.operator "onnx.Div"(%16065, %16066) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16068 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16069 = torch.operator "onnx.Mul"(%16067, %16068) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16070 = torch.operator "onnx.Slice"(%16059, %16063, %16069, %16061) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %16071 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16072 = torch.operator "onnx.Mul"(%16067, %16071) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16073 = torch.operator "onnx.Slice"(%16059, %16069, %16072, %16061) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %16074 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16075 = torch.operator "onnx.Mul"(%16067, %16074) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16076 = torch.operator "onnx.Slice"(%16059, %16072, %16075, %16061) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.292Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.29/norm/norm/Constant_attr__value" : tensor<3072xbf16> %16077 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.292Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.292Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.29/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %16078 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.292Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %16079 = torch.operator "onnx.LayerNormalization"(%16058, %16077, %16078) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16080 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16081 = torch.operator "onnx.Unsqueeze"(%16073, %16080) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %16082 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %16083 = torch.operator "onnx.Add"(%16081, %16082) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %16084 = torch.operator "onnx.Mul"(%16079, %16083) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16085 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16086 = torch.operator "onnx.Unsqueeze"(%16070, %16085) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %16087 = torch.operator "onnx.Add"(%16084, %16086) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16088 = torch.operator "onnx.MatMul"(%16087, %1114) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16089 = torch.operator "onnx.Add"(%657, %16088) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16090 = torch.operator "onnx.Mul"(%16089, %16089) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16091 = torch.operator "onnx.Mul"(%16089, %16090) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16092 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %16093 = torch.operator "onnx.Mul"(%16092, %16091) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16094 = torch.operator "onnx.Add"(%16089, %16093) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16095 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %16096 = torch.operator "onnx.Mul"(%16095, %16094) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16097 = torch.operator "onnx.Tanh"(%16096) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16098 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %16099 = torch.operator "onnx.Add"(%16098, %16097) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16100 = torch.operator "onnx.Mul"(%16089, %16099) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16101 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %16102 = torch.operator "onnx.Mul"(%16101, %16100) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16103 = torch.operator "onnx.Shape"(%16087) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %16104 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16105 = torch.operator "onnx.Gather"(%16103, %16104) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16106 = torch.operator "onnx.MatMul"(%16087, %1115) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16107 = torch.operator "onnx.Add"(%661, %16106) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16108 = torch.operator "onnx.MatMul"(%16087, %1116) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16109 = torch.operator "onnx.Add"(%662, %16108) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16110 = torch.operator "onnx.MatMul"(%16087, %1117) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16111 = torch.operator "onnx.Add"(%663, %16110) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16112 = torch.operator "onnx.Shape"(%16109) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %16113 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16114 = torch.operator "onnx.Gather"(%16112, %16113) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16115 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16116 = torch.operator "onnx.Div"(%16114, %16115) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16117 = torch.operator "onnx.Cast"(%16116) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16118 = torch.operator "onnx.Cast"(%16117) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16119 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20661_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16120 = torch.operator "onnx.Unsqueeze"(%16105, %16119) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16121 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16122 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16123 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20665_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16124 = torch.operator "onnx.Unsqueeze"(%16118, %16123) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16125 = torch.operator "onnx.Concat"(%16120, %16121, %16122, %16124) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %16126 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20668_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16127 = torch.operator "onnx.Unsqueeze"(%16105, %16126) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16128 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16129 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16130 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20672_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16131 = torch.operator "onnx.Unsqueeze"(%16118, %16130) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16132 = torch.operator "onnx.Concat"(%16127, %16128, %16129, %16131) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %16133 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20675_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16134 = torch.operator "onnx.Unsqueeze"(%16105, %16133) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16135 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16136 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16137 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20679_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16138 = torch.operator "onnx.Unsqueeze"(%16118, %16137) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16139 = torch.operator "onnx.Concat"(%16134, %16135, %16136, %16138) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %16140 = torch.operator "onnx.Reshape"(%16107, %16125) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16141 = torch.operator "onnx.Transpose"(%16140) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %16142 = torch.operator "onnx.Reshape"(%16109, %16132) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16143 = torch.operator "onnx.Transpose"(%16142) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %16144 = torch.operator "onnx.Reshape"(%16111, %16139) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16145 = torch.operator "onnx.Transpose"(%16144) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %16146 = torch.operator "onnx.Cast"(%16141) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %16147 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %16148 = torch.operator "onnx.Pow"(%16146, %16147) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %16149 = torch.operator "onnx.ReduceMean"(%16148) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %16150 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %16151 = torch.operator "onnx.Add"(%16149, %16150) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %16152 = torch.operator "onnx.Sqrt"(%16151) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %16153 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %16154 = torch.operator "onnx.Div"(%16153, %16152) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %16155 = torch.operator "onnx.Cast"(%16141) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %16156 = torch.operator "onnx.Mul"(%16155, %16154) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %16157 = torch.operator "onnx.Cast"(%16156) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %16158 = torch.operator "onnx.Mul"(%16157, %659) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %16159 = torch.operator "onnx.Cast"(%16143) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %16160 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %16161 = torch.operator "onnx.Pow"(%16159, %16160) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %16162 = torch.operator "onnx.ReduceMean"(%16161) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %16163 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %16164 = torch.operator "onnx.Add"(%16162, %16163) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %16165 = torch.operator "onnx.Sqrt"(%16164) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %16166 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %16167 = torch.operator "onnx.Div"(%16166, %16165) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %16168 = torch.operator "onnx.Cast"(%16143) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %16169 = torch.operator "onnx.Mul"(%16168, %16167) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %16170 = torch.operator "onnx.Cast"(%16169) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %16171 = torch.operator "onnx.Mul"(%16170, %660) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %16172 = torch.operator "onnx.Shape"(%16158) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %16173 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16174 = torch.operator "onnx.Gather"(%16172, %16173) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16175 = torch.operator "onnx.Shape"(%16158) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %16176 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16177 = torch.operator "onnx.Gather"(%16175, %16176) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16178 = torch.operator "onnx.Shape"(%16158) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %16179 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16180 = torch.operator "onnx.Gather"(%16178, %16179) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16181 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20723_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16182 = torch.operator "onnx.Unsqueeze"(%16174, %16181) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16183 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20725_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16184 = torch.operator "onnx.Unsqueeze"(%16177, %16183) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16185 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20727_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16186 = torch.operator "onnx.Unsqueeze"(%16180, %16185) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16187 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16188 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16189 = torch.operator "onnx.Concat"(%16182, %16184, %16186, %16187, %16188) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %16190 = torch.operator "onnx.Reshape"(%16158, %16189) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %16191 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %16192:2 = torch.operator "onnx.Split"(%16190, %16191) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %16193 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16194 = torch.operator "onnx.Squeeze"(%16192#0, %16193) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16195 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16196 = torch.operator "onnx.Squeeze"(%16192#1, %16195) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16197 = torch.operator "onnx.Neg"(%16196) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %16198 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16199 = torch.operator "onnx.Unsqueeze"(%16197, %16198) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %16200 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16201 = torch.operator "onnx.Unsqueeze"(%16194, %16200) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %16202 = torch.operator "onnx.Concat"(%16199, %16201) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %16203 = torch.operator "onnx.Shape"(%16202) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %16204 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16205 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16206 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16207 = torch.operator "onnx.Slice"(%16203, %16205, %16206, %16204) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %16208 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16209 = torch.operator "onnx.Concat"(%16207, %16208) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %16210 = torch.operator "onnx.Reshape"(%16202, %16209) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16211 = torch.operator "onnx.Cast"(%16158) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %16212 = torch.operator "onnx.Mul"(%16211, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %16213 = torch.operator "onnx.Cast"(%16210) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %16214 = torch.operator "onnx.Mul"(%16213, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %16215 = torch.operator "onnx.Add"(%16212, %16214) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %16216 = torch.operator "onnx.Cast"(%16215) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %16217 = torch.operator "onnx.Shape"(%16171) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %16218 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16219 = torch.operator "onnx.Gather"(%16217, %16218) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16220 = torch.operator "onnx.Shape"(%16171) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %16221 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16222 = torch.operator "onnx.Gather"(%16220, %16221) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16223 = torch.operator "onnx.Shape"(%16171) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %16224 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16225 = torch.operator "onnx.Gather"(%16223, %16224) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16226 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20768_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16227 = torch.operator "onnx.Unsqueeze"(%16219, %16226) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16228 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20770_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16229 = torch.operator "onnx.Unsqueeze"(%16222, %16228) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16230 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20772_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16231 = torch.operator "onnx.Unsqueeze"(%16225, %16230) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16232 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16233 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16234 = torch.operator "onnx.Concat"(%16227, %16229, %16231, %16232, %16233) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %16235 = torch.operator "onnx.Reshape"(%16171, %16234) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %16236 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %16237:2 = torch.operator "onnx.Split"(%16235, %16236) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %16238 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16239 = torch.operator "onnx.Squeeze"(%16237#0, %16238) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16240 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16241 = torch.operator "onnx.Squeeze"(%16237#1, %16240) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16242 = torch.operator "onnx.Neg"(%16241) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %16243 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16244 = torch.operator "onnx.Unsqueeze"(%16242, %16243) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %16245 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16246 = torch.operator "onnx.Unsqueeze"(%16239, %16245) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %16247 = torch.operator "onnx.Concat"(%16244, %16246) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %16248 = torch.operator "onnx.Shape"(%16247) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %16249 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16250 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16251 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16252 = torch.operator "onnx.Slice"(%16248, %16250, %16251, %16249) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %16253 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16254 = torch.operator "onnx.Concat"(%16252, %16253) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %16255 = torch.operator "onnx.Reshape"(%16247, %16254) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16256 = torch.operator "onnx.Cast"(%16171) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %16257 = torch.operator "onnx.Mul"(%16256, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %16258 = torch.operator "onnx.Cast"(%16255) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %16259 = torch.operator "onnx.Mul"(%16258, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %16260 = torch.operator "onnx.Add"(%16257, %16259) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %16261 = torch.operator "onnx.Cast"(%16260) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %16262 = torch.operator "onnx.Shape"(%16216) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %16263 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16264 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16265 = torch.operator "onnx.Slice"(%16262, %16263, %16264) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16266 = torch.operator "onnx.Cast"(%16265) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %16267 = torch.operator "onnx.Sqrt"(%16266) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %16268 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %16269 = torch.operator "onnx.Cast"(%16267) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %16270 = torch.operator "onnx.Div"(%16268, %16269) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %16271 = torch.operator "onnx.Cast"(%16270) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %16272 = torch.operator "onnx.Transpose"(%16261) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %16273 = torch.operator "onnx.Sqrt"(%16271) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %16274 = torch.operator "onnx.Mul"(%16216, %16273) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %16275 = torch.operator "onnx.Sqrt"(%16271) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %16276 = torch.operator "onnx.Mul"(%16272, %16275) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %16277 = torch.operator "onnx.MatMul"(%16274, %16276) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %16278 = torch.operator "onnx.Softmax"(%16277) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %16279 = torch.operator "onnx.MatMul"(%16278, %16145) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %16280 = torch.operator "onnx.Transpose"(%16279) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %16281 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16282 = torch.operator "onnx.Mul"(%16118, %16281) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16283 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20825_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16284 = torch.operator "onnx.Unsqueeze"(%16105, %16283) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16285 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16286 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20828_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16287 = torch.operator "onnx.Unsqueeze"(%16282, %16286) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16288 = torch.operator "onnx.Concat"(%16284, %16285, %16287) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %16289 = torch.operator "onnx.Reshape"(%16280, %16288) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %16290 = torch.operator "onnx.Cast"(%16289) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %16291 = torch.operator "onnx.Concat"(%16290, %16102) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %16292 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.29_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16293 = torch.operator "onnx.Unsqueeze"(%16076, %16292) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %16294 = torch.operator "onnx.MatMul"(%16291, %1118) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16295 = torch.operator "onnx.Add"(%658, %16294) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16296 = torch.operator "onnx.Mul"(%16293, %16295) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16297 = torch.operator "onnx.Add"(%16058, %16296) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16298 = torch.operator "onnx.Gemm"(%1285, %664, %665) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %16299 = torch.operator "onnx.Shape"(%16298) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %16300 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16301 = torch.operator "onnx.Gather"(%16299, %16300) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16302 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16303 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16304 = torch.operator "onnx.Add"(%16301, %16303) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16305 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16306 = torch.operator "onnx.Div"(%16304, %16305) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16307 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16308 = torch.operator "onnx.Mul"(%16306, %16307) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16309 = torch.operator "onnx.Slice"(%16298, %16302, %16308, %16300) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %16310 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16311 = torch.operator "onnx.Mul"(%16306, %16310) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16312 = torch.operator "onnx.Slice"(%16298, %16308, %16311, %16300) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %16313 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16314 = torch.operator "onnx.Mul"(%16306, %16313) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16315 = torch.operator "onnx.Slice"(%16298, %16311, %16314, %16300) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.302Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.30/norm/norm/Constant_attr__value" : tensor<3072xbf16> %16316 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.302Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.302Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.30/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %16317 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.302Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %16318 = torch.operator "onnx.LayerNormalization"(%16297, %16316, %16317) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16319 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16320 = torch.operator "onnx.Unsqueeze"(%16312, %16319) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %16321 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %16322 = torch.operator "onnx.Add"(%16320, %16321) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %16323 = torch.operator "onnx.Mul"(%16318, %16322) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16324 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16325 = torch.operator "onnx.Unsqueeze"(%16309, %16324) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %16326 = torch.operator "onnx.Add"(%16323, %16325) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16327 = torch.operator "onnx.MatMul"(%16326, %1119) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16328 = torch.operator "onnx.Add"(%666, %16327) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16329 = torch.operator "onnx.Mul"(%16328, %16328) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16330 = torch.operator "onnx.Mul"(%16328, %16329) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16331 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %16332 = torch.operator "onnx.Mul"(%16331, %16330) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16333 = torch.operator "onnx.Add"(%16328, %16332) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16334 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %16335 = torch.operator "onnx.Mul"(%16334, %16333) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16336 = torch.operator "onnx.Tanh"(%16335) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16337 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %16338 = torch.operator "onnx.Add"(%16337, %16336) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16339 = torch.operator "onnx.Mul"(%16328, %16338) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16340 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %16341 = torch.operator "onnx.Mul"(%16340, %16339) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16342 = torch.operator "onnx.Shape"(%16326) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %16343 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16344 = torch.operator "onnx.Gather"(%16342, %16343) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16345 = torch.operator "onnx.MatMul"(%16326, %1120) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16346 = torch.operator "onnx.Add"(%670, %16345) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16347 = torch.operator "onnx.MatMul"(%16326, %1121) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16348 = torch.operator "onnx.Add"(%671, %16347) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16349 = torch.operator "onnx.MatMul"(%16326, %1122) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16350 = torch.operator "onnx.Add"(%672, %16349) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16351 = torch.operator "onnx.Shape"(%16348) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %16352 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16353 = torch.operator "onnx.Gather"(%16351, %16352) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16354 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16355 = torch.operator "onnx.Div"(%16353, %16354) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16356 = torch.operator "onnx.Cast"(%16355) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16357 = torch.operator "onnx.Cast"(%16356) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16358 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20900_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16359 = torch.operator "onnx.Unsqueeze"(%16344, %16358) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16360 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16361 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16362 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20904_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16363 = torch.operator "onnx.Unsqueeze"(%16357, %16362) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16364 = torch.operator "onnx.Concat"(%16359, %16360, %16361, %16363) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %16365 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20907_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16366 = torch.operator "onnx.Unsqueeze"(%16344, %16365) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16367 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16368 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16369 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20911_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16370 = torch.operator "onnx.Unsqueeze"(%16357, %16369) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16371 = torch.operator "onnx.Concat"(%16366, %16367, %16368, %16370) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %16372 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20914_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16373 = torch.operator "onnx.Unsqueeze"(%16344, %16372) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16374 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16375 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16376 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20918_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16377 = torch.operator "onnx.Unsqueeze"(%16357, %16376) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16378 = torch.operator "onnx.Concat"(%16373, %16374, %16375, %16377) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %16379 = torch.operator "onnx.Reshape"(%16346, %16364) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16380 = torch.operator "onnx.Transpose"(%16379) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %16381 = torch.operator "onnx.Reshape"(%16348, %16371) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16382 = torch.operator "onnx.Transpose"(%16381) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %16383 = torch.operator "onnx.Reshape"(%16350, %16378) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16384 = torch.operator "onnx.Transpose"(%16383) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %16385 = torch.operator "onnx.Cast"(%16380) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %16386 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %16387 = torch.operator "onnx.Pow"(%16385, %16386) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %16388 = torch.operator "onnx.ReduceMean"(%16387) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %16389 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %16390 = torch.operator "onnx.Add"(%16388, %16389) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %16391 = torch.operator "onnx.Sqrt"(%16390) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %16392 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %16393 = torch.operator "onnx.Div"(%16392, %16391) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %16394 = torch.operator "onnx.Cast"(%16380) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %16395 = torch.operator "onnx.Mul"(%16394, %16393) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %16396 = torch.operator "onnx.Cast"(%16395) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %16397 = torch.operator "onnx.Mul"(%16396, %668) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %16398 = torch.operator "onnx.Cast"(%16382) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %16399 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %16400 = torch.operator "onnx.Pow"(%16398, %16399) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %16401 = torch.operator "onnx.ReduceMean"(%16400) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %16402 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %16403 = torch.operator "onnx.Add"(%16401, %16402) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %16404 = torch.operator "onnx.Sqrt"(%16403) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %16405 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %16406 = torch.operator "onnx.Div"(%16405, %16404) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %16407 = torch.operator "onnx.Cast"(%16382) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %16408 = torch.operator "onnx.Mul"(%16407, %16406) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %16409 = torch.operator "onnx.Cast"(%16408) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %16410 = torch.operator "onnx.Mul"(%16409, %669) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %16411 = torch.operator "onnx.Shape"(%16397) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %16412 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16413 = torch.operator "onnx.Gather"(%16411, %16412) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16414 = torch.operator "onnx.Shape"(%16397) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %16415 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16416 = torch.operator "onnx.Gather"(%16414, %16415) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16417 = torch.operator "onnx.Shape"(%16397) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %16418 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16419 = torch.operator "onnx.Gather"(%16417, %16418) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16420 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20962_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16421 = torch.operator "onnx.Unsqueeze"(%16413, %16420) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16422 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20964_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16423 = torch.operator "onnx.Unsqueeze"(%16416, %16422) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16424 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_20966_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16425 = torch.operator "onnx.Unsqueeze"(%16419, %16424) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16426 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16427 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16428 = torch.operator "onnx.Concat"(%16421, %16423, %16425, %16426, %16427) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %16429 = torch.operator "onnx.Reshape"(%16397, %16428) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %16430 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %16431:2 = torch.operator "onnx.Split"(%16429, %16430) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %16432 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16433 = torch.operator "onnx.Squeeze"(%16431#0, %16432) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16434 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16435 = torch.operator "onnx.Squeeze"(%16431#1, %16434) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16436 = torch.operator "onnx.Neg"(%16435) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %16437 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16438 = torch.operator "onnx.Unsqueeze"(%16436, %16437) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %16439 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16440 = torch.operator "onnx.Unsqueeze"(%16433, %16439) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %16441 = torch.operator "onnx.Concat"(%16438, %16440) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %16442 = torch.operator "onnx.Shape"(%16441) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %16443 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16444 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16445 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16446 = torch.operator "onnx.Slice"(%16442, %16444, %16445, %16443) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %16447 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16448 = torch.operator "onnx.Concat"(%16446, %16447) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %16449 = torch.operator "onnx.Reshape"(%16441, %16448) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16450 = torch.operator "onnx.Cast"(%16397) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %16451 = torch.operator "onnx.Mul"(%16450, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %16452 = torch.operator "onnx.Cast"(%16449) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %16453 = torch.operator "onnx.Mul"(%16452, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %16454 = torch.operator "onnx.Add"(%16451, %16453) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %16455 = torch.operator "onnx.Cast"(%16454) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %16456 = torch.operator "onnx.Shape"(%16410) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %16457 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16458 = torch.operator "onnx.Gather"(%16456, %16457) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16459 = torch.operator "onnx.Shape"(%16410) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %16460 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16461 = torch.operator "onnx.Gather"(%16459, %16460) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16462 = torch.operator "onnx.Shape"(%16410) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %16463 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16464 = torch.operator "onnx.Gather"(%16462, %16463) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16465 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21007_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16466 = torch.operator "onnx.Unsqueeze"(%16458, %16465) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16467 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21009_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16468 = torch.operator "onnx.Unsqueeze"(%16461, %16467) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16469 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21011_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16470 = torch.operator "onnx.Unsqueeze"(%16464, %16469) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16471 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16472 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16473 = torch.operator "onnx.Concat"(%16466, %16468, %16470, %16471, %16472) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %16474 = torch.operator "onnx.Reshape"(%16410, %16473) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %16475 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %16476:2 = torch.operator "onnx.Split"(%16474, %16475) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %16477 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16478 = torch.operator "onnx.Squeeze"(%16476#0, %16477) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16479 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16480 = torch.operator "onnx.Squeeze"(%16476#1, %16479) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16481 = torch.operator "onnx.Neg"(%16480) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %16482 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16483 = torch.operator "onnx.Unsqueeze"(%16481, %16482) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %16484 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16485 = torch.operator "onnx.Unsqueeze"(%16478, %16484) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %16486 = torch.operator "onnx.Concat"(%16483, %16485) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %16487 = torch.operator "onnx.Shape"(%16486) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %16488 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16489 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16490 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16491 = torch.operator "onnx.Slice"(%16487, %16489, %16490, %16488) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %16492 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16493 = torch.operator "onnx.Concat"(%16491, %16492) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %16494 = torch.operator "onnx.Reshape"(%16486, %16493) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16495 = torch.operator "onnx.Cast"(%16410) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %16496 = torch.operator "onnx.Mul"(%16495, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %16497 = torch.operator "onnx.Cast"(%16494) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %16498 = torch.operator "onnx.Mul"(%16497, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %16499 = torch.operator "onnx.Add"(%16496, %16498) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %16500 = torch.operator "onnx.Cast"(%16499) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %16501 = torch.operator "onnx.Shape"(%16455) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %16502 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16503 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16504 = torch.operator "onnx.Slice"(%16501, %16502, %16503) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16505 = torch.operator "onnx.Cast"(%16504) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %16506 = torch.operator "onnx.Sqrt"(%16505) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %16507 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %16508 = torch.operator "onnx.Cast"(%16506) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %16509 = torch.operator "onnx.Div"(%16507, %16508) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %16510 = torch.operator "onnx.Cast"(%16509) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %16511 = torch.operator "onnx.Transpose"(%16500) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %16512 = torch.operator "onnx.Sqrt"(%16510) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %16513 = torch.operator "onnx.Mul"(%16455, %16512) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %16514 = torch.operator "onnx.Sqrt"(%16510) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %16515 = torch.operator "onnx.Mul"(%16511, %16514) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %16516 = torch.operator "onnx.MatMul"(%16513, %16515) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %16517 = torch.operator "onnx.Softmax"(%16516) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %16518 = torch.operator "onnx.MatMul"(%16517, %16384) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %16519 = torch.operator "onnx.Transpose"(%16518) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %16520 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16521 = torch.operator "onnx.Mul"(%16357, %16520) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16522 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21064_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16523 = torch.operator "onnx.Unsqueeze"(%16344, %16522) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16524 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16525 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21067_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16526 = torch.operator "onnx.Unsqueeze"(%16521, %16525) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16527 = torch.operator "onnx.Concat"(%16523, %16524, %16526) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %16528 = torch.operator "onnx.Reshape"(%16519, %16527) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %16529 = torch.operator "onnx.Cast"(%16528) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %16530 = torch.operator "onnx.Concat"(%16529, %16341) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %16531 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.30_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16532 = torch.operator "onnx.Unsqueeze"(%16315, %16531) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %16533 = torch.operator "onnx.MatMul"(%16530, %1123) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16534 = torch.operator "onnx.Add"(%667, %16533) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16535 = torch.operator "onnx.Mul"(%16532, %16534) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16536 = torch.operator "onnx.Add"(%16297, %16535) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16537 = torch.operator "onnx.Gemm"(%1285, %673, %674) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %16538 = torch.operator "onnx.Shape"(%16537) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %16539 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16540 = torch.operator "onnx.Gather"(%16538, %16539) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16541 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16542 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16543 = torch.operator "onnx.Add"(%16540, %16542) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16544 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16545 = torch.operator "onnx.Div"(%16543, %16544) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16546 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16547 = torch.operator "onnx.Mul"(%16545, %16546) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16548 = torch.operator "onnx.Slice"(%16537, %16541, %16547, %16539) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %16549 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16550 = torch.operator "onnx.Mul"(%16545, %16549) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16551 = torch.operator "onnx.Slice"(%16537, %16547, %16550, %16539) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %16552 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16553 = torch.operator "onnx.Mul"(%16545, %16552) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16554 = torch.operator "onnx.Slice"(%16537, %16550, %16553, %16539) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.312Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.31/norm/norm/Constant_attr__value" : tensor<3072xbf16> %16555 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.312Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.312Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.31/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %16556 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.312Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %16557 = torch.operator "onnx.LayerNormalization"(%16536, %16555, %16556) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16558 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16559 = torch.operator "onnx.Unsqueeze"(%16551, %16558) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %16560 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %16561 = torch.operator "onnx.Add"(%16559, %16560) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %16562 = torch.operator "onnx.Mul"(%16557, %16561) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16563 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16564 = torch.operator "onnx.Unsqueeze"(%16548, %16563) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %16565 = torch.operator "onnx.Add"(%16562, %16564) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16566 = torch.operator "onnx.MatMul"(%16565, %1124) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16567 = torch.operator "onnx.Add"(%675, %16566) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16568 = torch.operator "onnx.Mul"(%16567, %16567) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16569 = torch.operator "onnx.Mul"(%16567, %16568) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16570 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %16571 = torch.operator "onnx.Mul"(%16570, %16569) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16572 = torch.operator "onnx.Add"(%16567, %16571) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16573 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %16574 = torch.operator "onnx.Mul"(%16573, %16572) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16575 = torch.operator "onnx.Tanh"(%16574) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16576 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %16577 = torch.operator "onnx.Add"(%16576, %16575) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16578 = torch.operator "onnx.Mul"(%16567, %16577) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16579 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %16580 = torch.operator "onnx.Mul"(%16579, %16578) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16581 = torch.operator "onnx.Shape"(%16565) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %16582 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16583 = torch.operator "onnx.Gather"(%16581, %16582) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16584 = torch.operator "onnx.MatMul"(%16565, %1125) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16585 = torch.operator "onnx.Add"(%679, %16584) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16586 = torch.operator "onnx.MatMul"(%16565, %1126) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16587 = torch.operator "onnx.Add"(%680, %16586) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16588 = torch.operator "onnx.MatMul"(%16565, %1127) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16589 = torch.operator "onnx.Add"(%681, %16588) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16590 = torch.operator "onnx.Shape"(%16587) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %16591 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16592 = torch.operator "onnx.Gather"(%16590, %16591) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16593 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16594 = torch.operator "onnx.Div"(%16592, %16593) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16595 = torch.operator "onnx.Cast"(%16594) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16596 = torch.operator "onnx.Cast"(%16595) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16597 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21139_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16598 = torch.operator "onnx.Unsqueeze"(%16583, %16597) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16599 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16600 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16601 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21143_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16602 = torch.operator "onnx.Unsqueeze"(%16596, %16601) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16603 = torch.operator "onnx.Concat"(%16598, %16599, %16600, %16602) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %16604 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21146_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16605 = torch.operator "onnx.Unsqueeze"(%16583, %16604) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16606 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16607 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16608 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21150_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16609 = torch.operator "onnx.Unsqueeze"(%16596, %16608) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16610 = torch.operator "onnx.Concat"(%16605, %16606, %16607, %16609) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %16611 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21153_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16612 = torch.operator "onnx.Unsqueeze"(%16583, %16611) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16613 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16614 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16615 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21157_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16616 = torch.operator "onnx.Unsqueeze"(%16596, %16615) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16617 = torch.operator "onnx.Concat"(%16612, %16613, %16614, %16616) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %16618 = torch.operator "onnx.Reshape"(%16585, %16603) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16619 = torch.operator "onnx.Transpose"(%16618) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %16620 = torch.operator "onnx.Reshape"(%16587, %16610) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16621 = torch.operator "onnx.Transpose"(%16620) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %16622 = torch.operator "onnx.Reshape"(%16589, %16617) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16623 = torch.operator "onnx.Transpose"(%16622) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %16624 = torch.operator "onnx.Cast"(%16619) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %16625 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %16626 = torch.operator "onnx.Pow"(%16624, %16625) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %16627 = torch.operator "onnx.ReduceMean"(%16626) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %16628 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %16629 = torch.operator "onnx.Add"(%16627, %16628) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %16630 = torch.operator "onnx.Sqrt"(%16629) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %16631 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %16632 = torch.operator "onnx.Div"(%16631, %16630) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %16633 = torch.operator "onnx.Cast"(%16619) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %16634 = torch.operator "onnx.Mul"(%16633, %16632) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %16635 = torch.operator "onnx.Cast"(%16634) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %16636 = torch.operator "onnx.Mul"(%16635, %677) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %16637 = torch.operator "onnx.Cast"(%16621) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %16638 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %16639 = torch.operator "onnx.Pow"(%16637, %16638) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %16640 = torch.operator "onnx.ReduceMean"(%16639) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %16641 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %16642 = torch.operator "onnx.Add"(%16640, %16641) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %16643 = torch.operator "onnx.Sqrt"(%16642) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %16644 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %16645 = torch.operator "onnx.Div"(%16644, %16643) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %16646 = torch.operator "onnx.Cast"(%16621) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %16647 = torch.operator "onnx.Mul"(%16646, %16645) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %16648 = torch.operator "onnx.Cast"(%16647) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %16649 = torch.operator "onnx.Mul"(%16648, %678) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %16650 = torch.operator "onnx.Shape"(%16636) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %16651 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16652 = torch.operator "onnx.Gather"(%16650, %16651) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16653 = torch.operator "onnx.Shape"(%16636) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %16654 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16655 = torch.operator "onnx.Gather"(%16653, %16654) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16656 = torch.operator "onnx.Shape"(%16636) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %16657 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16658 = torch.operator "onnx.Gather"(%16656, %16657) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16659 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21201_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16660 = torch.operator "onnx.Unsqueeze"(%16652, %16659) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16661 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21203_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16662 = torch.operator "onnx.Unsqueeze"(%16655, %16661) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16663 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21205_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16664 = torch.operator "onnx.Unsqueeze"(%16658, %16663) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16665 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16666 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16667 = torch.operator "onnx.Concat"(%16660, %16662, %16664, %16665, %16666) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %16668 = torch.operator "onnx.Reshape"(%16636, %16667) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %16669 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %16670:2 = torch.operator "onnx.Split"(%16668, %16669) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %16671 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16672 = torch.operator "onnx.Squeeze"(%16670#0, %16671) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16673 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16674 = torch.operator "onnx.Squeeze"(%16670#1, %16673) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16675 = torch.operator "onnx.Neg"(%16674) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %16676 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16677 = torch.operator "onnx.Unsqueeze"(%16675, %16676) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %16678 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16679 = torch.operator "onnx.Unsqueeze"(%16672, %16678) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %16680 = torch.operator "onnx.Concat"(%16677, %16679) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %16681 = torch.operator "onnx.Shape"(%16680) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %16682 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16683 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16684 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16685 = torch.operator "onnx.Slice"(%16681, %16683, %16684, %16682) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %16686 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16687 = torch.operator "onnx.Concat"(%16685, %16686) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %16688 = torch.operator "onnx.Reshape"(%16680, %16687) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16689 = torch.operator "onnx.Cast"(%16636) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %16690 = torch.operator "onnx.Mul"(%16689, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %16691 = torch.operator "onnx.Cast"(%16688) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %16692 = torch.operator "onnx.Mul"(%16691, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %16693 = torch.operator "onnx.Add"(%16690, %16692) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %16694 = torch.operator "onnx.Cast"(%16693) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %16695 = torch.operator "onnx.Shape"(%16649) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %16696 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16697 = torch.operator "onnx.Gather"(%16695, %16696) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16698 = torch.operator "onnx.Shape"(%16649) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %16699 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16700 = torch.operator "onnx.Gather"(%16698, %16699) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16701 = torch.operator "onnx.Shape"(%16649) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %16702 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16703 = torch.operator "onnx.Gather"(%16701, %16702) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16704 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21246_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16705 = torch.operator "onnx.Unsqueeze"(%16697, %16704) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16706 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21248_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16707 = torch.operator "onnx.Unsqueeze"(%16700, %16706) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16708 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21250_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16709 = torch.operator "onnx.Unsqueeze"(%16703, %16708) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16710 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16711 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16712 = torch.operator "onnx.Concat"(%16705, %16707, %16709, %16710, %16711) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %16713 = torch.operator "onnx.Reshape"(%16649, %16712) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %16714 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %16715:2 = torch.operator "onnx.Split"(%16713, %16714) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %16716 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16717 = torch.operator "onnx.Squeeze"(%16715#0, %16716) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16718 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16719 = torch.operator "onnx.Squeeze"(%16715#1, %16718) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16720 = torch.operator "onnx.Neg"(%16719) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %16721 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16722 = torch.operator "onnx.Unsqueeze"(%16720, %16721) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %16723 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16724 = torch.operator "onnx.Unsqueeze"(%16717, %16723) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %16725 = torch.operator "onnx.Concat"(%16722, %16724) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %16726 = torch.operator "onnx.Shape"(%16725) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %16727 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16728 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16729 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16730 = torch.operator "onnx.Slice"(%16726, %16728, %16729, %16727) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %16731 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16732 = torch.operator "onnx.Concat"(%16730, %16731) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %16733 = torch.operator "onnx.Reshape"(%16725, %16732) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16734 = torch.operator "onnx.Cast"(%16649) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %16735 = torch.operator "onnx.Mul"(%16734, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %16736 = torch.operator "onnx.Cast"(%16733) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %16737 = torch.operator "onnx.Mul"(%16736, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %16738 = torch.operator "onnx.Add"(%16735, %16737) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %16739 = torch.operator "onnx.Cast"(%16738) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %16740 = torch.operator "onnx.Shape"(%16694) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %16741 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16742 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16743 = torch.operator "onnx.Slice"(%16740, %16741, %16742) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16744 = torch.operator "onnx.Cast"(%16743) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %16745 = torch.operator "onnx.Sqrt"(%16744) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %16746 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %16747 = torch.operator "onnx.Cast"(%16745) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %16748 = torch.operator "onnx.Div"(%16746, %16747) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %16749 = torch.operator "onnx.Cast"(%16748) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %16750 = torch.operator "onnx.Transpose"(%16739) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %16751 = torch.operator "onnx.Sqrt"(%16749) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %16752 = torch.operator "onnx.Mul"(%16694, %16751) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %16753 = torch.operator "onnx.Sqrt"(%16749) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %16754 = torch.operator "onnx.Mul"(%16750, %16753) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %16755 = torch.operator "onnx.MatMul"(%16752, %16754) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %16756 = torch.operator "onnx.Softmax"(%16755) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %16757 = torch.operator "onnx.MatMul"(%16756, %16623) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %16758 = torch.operator "onnx.Transpose"(%16757) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %16759 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16760 = torch.operator "onnx.Mul"(%16596, %16759) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16761 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21303_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16762 = torch.operator "onnx.Unsqueeze"(%16583, %16761) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16763 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16764 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21306_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16765 = torch.operator "onnx.Unsqueeze"(%16760, %16764) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16766 = torch.operator "onnx.Concat"(%16762, %16763, %16765) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %16767 = torch.operator "onnx.Reshape"(%16758, %16766) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %16768 = torch.operator "onnx.Cast"(%16767) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %16769 = torch.operator "onnx.Concat"(%16768, %16580) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %16770 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.31_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16771 = torch.operator "onnx.Unsqueeze"(%16554, %16770) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %16772 = torch.operator "onnx.MatMul"(%16769, %1128) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16773 = torch.operator "onnx.Add"(%676, %16772) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16774 = torch.operator "onnx.Mul"(%16771, %16773) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16775 = torch.operator "onnx.Add"(%16536, %16774) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16776 = torch.operator "onnx.Gemm"(%1285, %682, %683) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %16777 = torch.operator "onnx.Shape"(%16776) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %16778 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16779 = torch.operator "onnx.Gather"(%16777, %16778) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16780 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16781 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16782 = torch.operator "onnx.Add"(%16779, %16781) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16783 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16784 = torch.operator "onnx.Div"(%16782, %16783) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16785 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16786 = torch.operator "onnx.Mul"(%16784, %16785) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16787 = torch.operator "onnx.Slice"(%16776, %16780, %16786, %16778) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %16788 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16789 = torch.operator "onnx.Mul"(%16784, %16788) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16790 = torch.operator "onnx.Slice"(%16776, %16786, %16789, %16778) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %16791 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16792 = torch.operator "onnx.Mul"(%16784, %16791) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16793 = torch.operator "onnx.Slice"(%16776, %16789, %16792, %16778) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.322Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.32/norm/norm/Constant_attr__value" : tensor<3072xbf16> %16794 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.322Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.322Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.32/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %16795 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.322Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %16796 = torch.operator "onnx.LayerNormalization"(%16775, %16794, %16795) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16797 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16798 = torch.operator "onnx.Unsqueeze"(%16790, %16797) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %16799 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %16800 = torch.operator "onnx.Add"(%16798, %16799) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %16801 = torch.operator "onnx.Mul"(%16796, %16800) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16802 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16803 = torch.operator "onnx.Unsqueeze"(%16787, %16802) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %16804 = torch.operator "onnx.Add"(%16801, %16803) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16805 = torch.operator "onnx.MatMul"(%16804, %1129) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16806 = torch.operator "onnx.Add"(%684, %16805) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16807 = torch.operator "onnx.Mul"(%16806, %16806) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16808 = torch.operator "onnx.Mul"(%16806, %16807) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16809 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %16810 = torch.operator "onnx.Mul"(%16809, %16808) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16811 = torch.operator "onnx.Add"(%16806, %16810) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16812 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %16813 = torch.operator "onnx.Mul"(%16812, %16811) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16814 = torch.operator "onnx.Tanh"(%16813) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16815 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %16816 = torch.operator "onnx.Add"(%16815, %16814) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16817 = torch.operator "onnx.Mul"(%16806, %16816) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16818 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %16819 = torch.operator "onnx.Mul"(%16818, %16817) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %16820 = torch.operator "onnx.Shape"(%16804) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %16821 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16822 = torch.operator "onnx.Gather"(%16820, %16821) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16823 = torch.operator "onnx.MatMul"(%16804, %1130) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16824 = torch.operator "onnx.Add"(%688, %16823) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16825 = torch.operator "onnx.MatMul"(%16804, %1131) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16826 = torch.operator "onnx.Add"(%689, %16825) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16827 = torch.operator "onnx.MatMul"(%16804, %1132) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16828 = torch.operator "onnx.Add"(%690, %16827) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %16829 = torch.operator "onnx.Shape"(%16826) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %16830 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16831 = torch.operator "onnx.Gather"(%16829, %16830) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16832 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16833 = torch.operator "onnx.Div"(%16831, %16832) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16834 = torch.operator "onnx.Cast"(%16833) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16835 = torch.operator "onnx.Cast"(%16834) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16836 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21378_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16837 = torch.operator "onnx.Unsqueeze"(%16822, %16836) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16838 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16839 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16840 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21382_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16841 = torch.operator "onnx.Unsqueeze"(%16835, %16840) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16842 = torch.operator "onnx.Concat"(%16837, %16838, %16839, %16841) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %16843 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21385_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16844 = torch.operator "onnx.Unsqueeze"(%16822, %16843) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16845 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16846 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16847 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21389_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16848 = torch.operator "onnx.Unsqueeze"(%16835, %16847) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16849 = torch.operator "onnx.Concat"(%16844, %16845, %16846, %16848) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %16850 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21392_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16851 = torch.operator "onnx.Unsqueeze"(%16822, %16850) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16852 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16853 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16854 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21396_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16855 = torch.operator "onnx.Unsqueeze"(%16835, %16854) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16856 = torch.operator "onnx.Concat"(%16851, %16852, %16853, %16855) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %16857 = torch.operator "onnx.Reshape"(%16824, %16842) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16858 = torch.operator "onnx.Transpose"(%16857) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %16859 = torch.operator "onnx.Reshape"(%16826, %16849) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16860 = torch.operator "onnx.Transpose"(%16859) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %16861 = torch.operator "onnx.Reshape"(%16828, %16856) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16862 = torch.operator "onnx.Transpose"(%16861) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %16863 = torch.operator "onnx.Cast"(%16858) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %16864 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %16865 = torch.operator "onnx.Pow"(%16863, %16864) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %16866 = torch.operator "onnx.ReduceMean"(%16865) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %16867 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %16868 = torch.operator "onnx.Add"(%16866, %16867) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %16869 = torch.operator "onnx.Sqrt"(%16868) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %16870 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %16871 = torch.operator "onnx.Div"(%16870, %16869) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %16872 = torch.operator "onnx.Cast"(%16858) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %16873 = torch.operator "onnx.Mul"(%16872, %16871) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %16874 = torch.operator "onnx.Cast"(%16873) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %16875 = torch.operator "onnx.Mul"(%16874, %686) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %16876 = torch.operator "onnx.Cast"(%16860) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %16877 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %16878 = torch.operator "onnx.Pow"(%16876, %16877) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %16879 = torch.operator "onnx.ReduceMean"(%16878) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %16880 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %16881 = torch.operator "onnx.Add"(%16879, %16880) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %16882 = torch.operator "onnx.Sqrt"(%16881) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %16883 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %16884 = torch.operator "onnx.Div"(%16883, %16882) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %16885 = torch.operator "onnx.Cast"(%16860) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %16886 = torch.operator "onnx.Mul"(%16885, %16884) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %16887 = torch.operator "onnx.Cast"(%16886) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %16888 = torch.operator "onnx.Mul"(%16887, %687) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %16889 = torch.operator "onnx.Shape"(%16875) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %16890 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16891 = torch.operator "onnx.Gather"(%16889, %16890) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16892 = torch.operator "onnx.Shape"(%16875) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %16893 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16894 = torch.operator "onnx.Gather"(%16892, %16893) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16895 = torch.operator "onnx.Shape"(%16875) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %16896 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16897 = torch.operator "onnx.Gather"(%16895, %16896) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16898 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21440_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16899 = torch.operator "onnx.Unsqueeze"(%16891, %16898) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16900 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21442_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16901 = torch.operator "onnx.Unsqueeze"(%16894, %16900) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16902 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21444_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16903 = torch.operator "onnx.Unsqueeze"(%16897, %16902) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16904 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16905 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16906 = torch.operator "onnx.Concat"(%16899, %16901, %16903, %16904, %16905) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %16907 = torch.operator "onnx.Reshape"(%16875, %16906) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %16908 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %16909:2 = torch.operator "onnx.Split"(%16907, %16908) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %16910 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16911 = torch.operator "onnx.Squeeze"(%16909#0, %16910) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16912 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16913 = torch.operator "onnx.Squeeze"(%16909#1, %16912) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16914 = torch.operator "onnx.Neg"(%16913) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %16915 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16916 = torch.operator "onnx.Unsqueeze"(%16914, %16915) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %16917 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16918 = torch.operator "onnx.Unsqueeze"(%16911, %16917) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %16919 = torch.operator "onnx.Concat"(%16916, %16918) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %16920 = torch.operator "onnx.Shape"(%16919) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %16921 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16922 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16923 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16924 = torch.operator "onnx.Slice"(%16920, %16922, %16923, %16921) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %16925 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16926 = torch.operator "onnx.Concat"(%16924, %16925) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %16927 = torch.operator "onnx.Reshape"(%16919, %16926) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16928 = torch.operator "onnx.Cast"(%16875) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %16929 = torch.operator "onnx.Mul"(%16928, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %16930 = torch.operator "onnx.Cast"(%16927) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %16931 = torch.operator "onnx.Mul"(%16930, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %16932 = torch.operator "onnx.Add"(%16929, %16931) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %16933 = torch.operator "onnx.Cast"(%16932) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %16934 = torch.operator "onnx.Shape"(%16888) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %16935 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16936 = torch.operator "onnx.Gather"(%16934, %16935) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16937 = torch.operator "onnx.Shape"(%16888) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %16938 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16939 = torch.operator "onnx.Gather"(%16937, %16938) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16940 = torch.operator "onnx.Shape"(%16888) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %16941 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16942 = torch.operator "onnx.Gather"(%16940, %16941) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %16943 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21485_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16944 = torch.operator "onnx.Unsqueeze"(%16936, %16943) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16945 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21487_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16946 = torch.operator "onnx.Unsqueeze"(%16939, %16945) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16947 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21489_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16948 = torch.operator "onnx.Unsqueeze"(%16942, %16947) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16949 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16950 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16951 = torch.operator "onnx.Concat"(%16944, %16946, %16948, %16949, %16950) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %16952 = torch.operator "onnx.Reshape"(%16888, %16951) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %16953 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %16954:2 = torch.operator "onnx.Split"(%16952, %16953) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %16955 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16956 = torch.operator "onnx.Squeeze"(%16954#0, %16955) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16957 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16958 = torch.operator "onnx.Squeeze"(%16954#1, %16957) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16959 = torch.operator "onnx.Neg"(%16958) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %16960 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16961 = torch.operator "onnx.Unsqueeze"(%16959, %16960) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %16962 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16963 = torch.operator "onnx.Unsqueeze"(%16956, %16962) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %16964 = torch.operator "onnx.Concat"(%16961, %16963) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %16965 = torch.operator "onnx.Shape"(%16964) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %16966 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16967 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16968 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16969 = torch.operator "onnx.Slice"(%16965, %16967, %16968, %16966) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %16970 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16971 = torch.operator "onnx.Concat"(%16969, %16970) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %16972 = torch.operator "onnx.Reshape"(%16964, %16971) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %16973 = torch.operator "onnx.Cast"(%16888) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %16974 = torch.operator "onnx.Mul"(%16973, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %16975 = torch.operator "onnx.Cast"(%16972) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %16976 = torch.operator "onnx.Mul"(%16975, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %16977 = torch.operator "onnx.Add"(%16974, %16976) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %16978 = torch.operator "onnx.Cast"(%16977) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %16979 = torch.operator "onnx.Shape"(%16933) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %16980 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16981 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %16982 = torch.operator "onnx.Slice"(%16979, %16980, %16981) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %16983 = torch.operator "onnx.Cast"(%16982) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %16984 = torch.operator "onnx.Sqrt"(%16983) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %16985 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %16986 = torch.operator "onnx.Cast"(%16984) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %16987 = torch.operator "onnx.Div"(%16985, %16986) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %16988 = torch.operator "onnx.Cast"(%16987) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %16989 = torch.operator "onnx.Transpose"(%16978) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %16990 = torch.operator "onnx.Sqrt"(%16988) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %16991 = torch.operator "onnx.Mul"(%16933, %16990) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %16992 = torch.operator "onnx.Sqrt"(%16988) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %16993 = torch.operator "onnx.Mul"(%16989, %16992) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %16994 = torch.operator "onnx.MatMul"(%16991, %16993) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %16995 = torch.operator "onnx.Softmax"(%16994) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %16996 = torch.operator "onnx.MatMul"(%16995, %16862) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %16997 = torch.operator "onnx.Transpose"(%16996) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %16998 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %16999 = torch.operator "onnx.Mul"(%16835, %16998) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17000 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21542_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17001 = torch.operator "onnx.Unsqueeze"(%16822, %17000) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17002 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17003 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21545_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17004 = torch.operator "onnx.Unsqueeze"(%16999, %17003) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17005 = torch.operator "onnx.Concat"(%17001, %17002, %17004) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %17006 = torch.operator "onnx.Reshape"(%16997, %17005) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %17007 = torch.operator "onnx.Cast"(%17006) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %17008 = torch.operator "onnx.Concat"(%17007, %16819) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %17009 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.32_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17010 = torch.operator "onnx.Unsqueeze"(%16793, %17009) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %17011 = torch.operator "onnx.MatMul"(%17008, %1133) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17012 = torch.operator "onnx.Add"(%685, %17011) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17013 = torch.operator "onnx.Mul"(%17010, %17012) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17014 = torch.operator "onnx.Add"(%16775, %17013) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17015 = torch.operator "onnx.Gemm"(%1285, %691, %692) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %17016 = torch.operator "onnx.Shape"(%17015) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %17017 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17018 = torch.operator "onnx.Gather"(%17016, %17017) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17019 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17020 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17021 = torch.operator "onnx.Add"(%17018, %17020) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17022 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17023 = torch.operator "onnx.Div"(%17021, %17022) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17024 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17025 = torch.operator "onnx.Mul"(%17023, %17024) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17026 = torch.operator "onnx.Slice"(%17015, %17019, %17025, %17017) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %17027 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17028 = torch.operator "onnx.Mul"(%17023, %17027) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17029 = torch.operator "onnx.Slice"(%17015, %17025, %17028, %17017) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %17030 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17031 = torch.operator "onnx.Mul"(%17023, %17030) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17032 = torch.operator "onnx.Slice"(%17015, %17028, %17031, %17017) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.332Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.33/norm/norm/Constant_attr__value" : tensor<3072xbf16> %17033 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.332Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.332Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.33/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %17034 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.332Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %17035 = torch.operator "onnx.LayerNormalization"(%17014, %17033, %17034) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17036 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17037 = torch.operator "onnx.Unsqueeze"(%17029, %17036) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %17038 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %17039 = torch.operator "onnx.Add"(%17037, %17038) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %17040 = torch.operator "onnx.Mul"(%17035, %17039) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17041 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17042 = torch.operator "onnx.Unsqueeze"(%17026, %17041) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %17043 = torch.operator "onnx.Add"(%17040, %17042) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17044 = torch.operator "onnx.MatMul"(%17043, %1134) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17045 = torch.operator "onnx.Add"(%693, %17044) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17046 = torch.operator "onnx.Mul"(%17045, %17045) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17047 = torch.operator "onnx.Mul"(%17045, %17046) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17048 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %17049 = torch.operator "onnx.Mul"(%17048, %17047) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17050 = torch.operator "onnx.Add"(%17045, %17049) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17051 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %17052 = torch.operator "onnx.Mul"(%17051, %17050) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17053 = torch.operator "onnx.Tanh"(%17052) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17054 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %17055 = torch.operator "onnx.Add"(%17054, %17053) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17056 = torch.operator "onnx.Mul"(%17045, %17055) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17057 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %17058 = torch.operator "onnx.Mul"(%17057, %17056) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17059 = torch.operator "onnx.Shape"(%17043) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %17060 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17061 = torch.operator "onnx.Gather"(%17059, %17060) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17062 = torch.operator "onnx.MatMul"(%17043, %1135) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17063 = torch.operator "onnx.Add"(%697, %17062) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17064 = torch.operator "onnx.MatMul"(%17043, %1136) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17065 = torch.operator "onnx.Add"(%698, %17064) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17066 = torch.operator "onnx.MatMul"(%17043, %1137) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17067 = torch.operator "onnx.Add"(%699, %17066) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17068 = torch.operator "onnx.Shape"(%17065) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %17069 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17070 = torch.operator "onnx.Gather"(%17068, %17069) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17071 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17072 = torch.operator "onnx.Div"(%17070, %17071) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17073 = torch.operator "onnx.Cast"(%17072) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17074 = torch.operator "onnx.Cast"(%17073) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17075 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21617_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17076 = torch.operator "onnx.Unsqueeze"(%17061, %17075) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17077 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17078 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17079 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21621_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17080 = torch.operator "onnx.Unsqueeze"(%17074, %17079) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17081 = torch.operator "onnx.Concat"(%17076, %17077, %17078, %17080) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %17082 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21624_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17083 = torch.operator "onnx.Unsqueeze"(%17061, %17082) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17084 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17085 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17086 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21628_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17087 = torch.operator "onnx.Unsqueeze"(%17074, %17086) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17088 = torch.operator "onnx.Concat"(%17083, %17084, %17085, %17087) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %17089 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21631_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17090 = torch.operator "onnx.Unsqueeze"(%17061, %17089) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17091 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17092 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17093 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21635_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17094 = torch.operator "onnx.Unsqueeze"(%17074, %17093) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17095 = torch.operator "onnx.Concat"(%17090, %17091, %17092, %17094) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %17096 = torch.operator "onnx.Reshape"(%17063, %17081) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %17097 = torch.operator "onnx.Transpose"(%17096) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %17098 = torch.operator "onnx.Reshape"(%17065, %17088) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %17099 = torch.operator "onnx.Transpose"(%17098) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %17100 = torch.operator "onnx.Reshape"(%17067, %17095) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %17101 = torch.operator "onnx.Transpose"(%17100) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %17102 = torch.operator "onnx.Cast"(%17097) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %17103 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %17104 = torch.operator "onnx.Pow"(%17102, %17103) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %17105 = torch.operator "onnx.ReduceMean"(%17104) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %17106 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %17107 = torch.operator "onnx.Add"(%17105, %17106) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %17108 = torch.operator "onnx.Sqrt"(%17107) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %17109 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %17110 = torch.operator "onnx.Div"(%17109, %17108) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %17111 = torch.operator "onnx.Cast"(%17097) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %17112 = torch.operator "onnx.Mul"(%17111, %17110) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %17113 = torch.operator "onnx.Cast"(%17112) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %17114 = torch.operator "onnx.Mul"(%17113, %695) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %17115 = torch.operator "onnx.Cast"(%17099) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %17116 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %17117 = torch.operator "onnx.Pow"(%17115, %17116) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %17118 = torch.operator "onnx.ReduceMean"(%17117) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %17119 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %17120 = torch.operator "onnx.Add"(%17118, %17119) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %17121 = torch.operator "onnx.Sqrt"(%17120) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %17122 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %17123 = torch.operator "onnx.Div"(%17122, %17121) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %17124 = torch.operator "onnx.Cast"(%17099) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %17125 = torch.operator "onnx.Mul"(%17124, %17123) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %17126 = torch.operator "onnx.Cast"(%17125) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %17127 = torch.operator "onnx.Mul"(%17126, %696) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %17128 = torch.operator "onnx.Shape"(%17114) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %17129 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17130 = torch.operator "onnx.Gather"(%17128, %17129) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17131 = torch.operator "onnx.Shape"(%17114) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %17132 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17133 = torch.operator "onnx.Gather"(%17131, %17132) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17134 = torch.operator "onnx.Shape"(%17114) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %17135 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17136 = torch.operator "onnx.Gather"(%17134, %17135) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17137 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21679_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17138 = torch.operator "onnx.Unsqueeze"(%17130, %17137) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17139 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21681_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17140 = torch.operator "onnx.Unsqueeze"(%17133, %17139) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17141 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21683_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17142 = torch.operator "onnx.Unsqueeze"(%17136, %17141) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17143 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17144 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17145 = torch.operator "onnx.Concat"(%17138, %17140, %17142, %17143, %17144) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %17146 = torch.operator "onnx.Reshape"(%17114, %17145) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %17147 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %17148:2 = torch.operator "onnx.Split"(%17146, %17147) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %17149 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17150 = torch.operator "onnx.Squeeze"(%17148#0, %17149) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %17151 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17152 = torch.operator "onnx.Squeeze"(%17148#1, %17151) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %17153 = torch.operator "onnx.Neg"(%17152) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %17154 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17155 = torch.operator "onnx.Unsqueeze"(%17153, %17154) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %17156 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17157 = torch.operator "onnx.Unsqueeze"(%17150, %17156) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %17158 = torch.operator "onnx.Concat"(%17155, %17157) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %17159 = torch.operator "onnx.Shape"(%17158) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %17160 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17161 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17162 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17163 = torch.operator "onnx.Slice"(%17159, %17161, %17162, %17160) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %17164 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17165 = torch.operator "onnx.Concat"(%17163, %17164) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %17166 = torch.operator "onnx.Reshape"(%17158, %17165) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %17167 = torch.operator "onnx.Cast"(%17114) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %17168 = torch.operator "onnx.Mul"(%17167, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %17169 = torch.operator "onnx.Cast"(%17166) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %17170 = torch.operator "onnx.Mul"(%17169, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %17171 = torch.operator "onnx.Add"(%17168, %17170) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %17172 = torch.operator "onnx.Cast"(%17171) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %17173 = torch.operator "onnx.Shape"(%17127) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %17174 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17175 = torch.operator "onnx.Gather"(%17173, %17174) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17176 = torch.operator "onnx.Shape"(%17127) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %17177 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17178 = torch.operator "onnx.Gather"(%17176, %17177) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17179 = torch.operator "onnx.Shape"(%17127) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %17180 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17181 = torch.operator "onnx.Gather"(%17179, %17180) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17182 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21724_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17183 = torch.operator "onnx.Unsqueeze"(%17175, %17182) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17184 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21726_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17185 = torch.operator "onnx.Unsqueeze"(%17178, %17184) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17186 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21728_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17187 = torch.operator "onnx.Unsqueeze"(%17181, %17186) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17188 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17189 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17190 = torch.operator "onnx.Concat"(%17183, %17185, %17187, %17188, %17189) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %17191 = torch.operator "onnx.Reshape"(%17127, %17190) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %17192 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %17193:2 = torch.operator "onnx.Split"(%17191, %17192) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %17194 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17195 = torch.operator "onnx.Squeeze"(%17193#0, %17194) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %17196 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17197 = torch.operator "onnx.Squeeze"(%17193#1, %17196) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %17198 = torch.operator "onnx.Neg"(%17197) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %17199 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17200 = torch.operator "onnx.Unsqueeze"(%17198, %17199) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %17201 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17202 = torch.operator "onnx.Unsqueeze"(%17195, %17201) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %17203 = torch.operator "onnx.Concat"(%17200, %17202) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %17204 = torch.operator "onnx.Shape"(%17203) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %17205 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17206 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17207 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17208 = torch.operator "onnx.Slice"(%17204, %17206, %17207, %17205) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %17209 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17210 = torch.operator "onnx.Concat"(%17208, %17209) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %17211 = torch.operator "onnx.Reshape"(%17203, %17210) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %17212 = torch.operator "onnx.Cast"(%17127) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %17213 = torch.operator "onnx.Mul"(%17212, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %17214 = torch.operator "onnx.Cast"(%17211) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %17215 = torch.operator "onnx.Mul"(%17214, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %17216 = torch.operator "onnx.Add"(%17213, %17215) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %17217 = torch.operator "onnx.Cast"(%17216) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %17218 = torch.operator "onnx.Shape"(%17172) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %17219 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17220 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17221 = torch.operator "onnx.Slice"(%17218, %17219, %17220) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17222 = torch.operator "onnx.Cast"(%17221) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %17223 = torch.operator "onnx.Sqrt"(%17222) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %17224 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %17225 = torch.operator "onnx.Cast"(%17223) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %17226 = torch.operator "onnx.Div"(%17224, %17225) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %17227 = torch.operator "onnx.Cast"(%17226) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %17228 = torch.operator "onnx.Transpose"(%17217) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %17229 = torch.operator "onnx.Sqrt"(%17227) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %17230 = torch.operator "onnx.Mul"(%17172, %17229) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %17231 = torch.operator "onnx.Sqrt"(%17227) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %17232 = torch.operator "onnx.Mul"(%17228, %17231) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %17233 = torch.operator "onnx.MatMul"(%17230, %17232) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %17234 = torch.operator "onnx.Softmax"(%17233) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %17235 = torch.operator "onnx.MatMul"(%17234, %17101) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %17236 = torch.operator "onnx.Transpose"(%17235) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %17237 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17238 = torch.operator "onnx.Mul"(%17074, %17237) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17239 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21781_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17240 = torch.operator "onnx.Unsqueeze"(%17061, %17239) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17241 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17242 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21784_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17243 = torch.operator "onnx.Unsqueeze"(%17238, %17242) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17244 = torch.operator "onnx.Concat"(%17240, %17241, %17243) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %17245 = torch.operator "onnx.Reshape"(%17236, %17244) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %17246 = torch.operator "onnx.Cast"(%17245) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %17247 = torch.operator "onnx.Concat"(%17246, %17058) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %17248 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.33_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17249 = torch.operator "onnx.Unsqueeze"(%17032, %17248) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %17250 = torch.operator "onnx.MatMul"(%17247, %1138) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17251 = torch.operator "onnx.Add"(%694, %17250) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17252 = torch.operator "onnx.Mul"(%17249, %17251) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17253 = torch.operator "onnx.Add"(%17014, %17252) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17254 = torch.operator "onnx.Gemm"(%1285, %700, %701) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %17255 = torch.operator "onnx.Shape"(%17254) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %17256 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17257 = torch.operator "onnx.Gather"(%17255, %17256) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17258 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17259 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17260 = torch.operator "onnx.Add"(%17257, %17259) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17261 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17262 = torch.operator "onnx.Div"(%17260, %17261) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17263 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17264 = torch.operator "onnx.Mul"(%17262, %17263) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17265 = torch.operator "onnx.Slice"(%17254, %17258, %17264, %17256) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %17266 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17267 = torch.operator "onnx.Mul"(%17262, %17266) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17268 = torch.operator "onnx.Slice"(%17254, %17264, %17267, %17256) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %17269 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17270 = torch.operator "onnx.Mul"(%17262, %17269) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17271 = torch.operator "onnx.Slice"(%17254, %17267, %17270, %17256) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.342Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.34/norm/norm/Constant_attr__value" : tensor<3072xbf16> %17272 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.342Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.342Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.34/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %17273 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.342Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %17274 = torch.operator "onnx.LayerNormalization"(%17253, %17272, %17273) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17275 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17276 = torch.operator "onnx.Unsqueeze"(%17268, %17275) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %17277 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %17278 = torch.operator "onnx.Add"(%17276, %17277) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %17279 = torch.operator "onnx.Mul"(%17274, %17278) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17280 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17281 = torch.operator "onnx.Unsqueeze"(%17265, %17280) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %17282 = torch.operator "onnx.Add"(%17279, %17281) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17283 = torch.operator "onnx.MatMul"(%17282, %1139) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17284 = torch.operator "onnx.Add"(%702, %17283) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17285 = torch.operator "onnx.Mul"(%17284, %17284) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17286 = torch.operator "onnx.Mul"(%17284, %17285) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17287 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %17288 = torch.operator "onnx.Mul"(%17287, %17286) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17289 = torch.operator "onnx.Add"(%17284, %17288) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17290 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %17291 = torch.operator "onnx.Mul"(%17290, %17289) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17292 = torch.operator "onnx.Tanh"(%17291) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17293 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %17294 = torch.operator "onnx.Add"(%17293, %17292) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17295 = torch.operator "onnx.Mul"(%17284, %17294) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17296 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %17297 = torch.operator "onnx.Mul"(%17296, %17295) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17298 = torch.operator "onnx.Shape"(%17282) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %17299 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17300 = torch.operator "onnx.Gather"(%17298, %17299) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17301 = torch.operator "onnx.MatMul"(%17282, %1140) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17302 = torch.operator "onnx.Add"(%706, %17301) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17303 = torch.operator "onnx.MatMul"(%17282, %1141) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17304 = torch.operator "onnx.Add"(%707, %17303) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17305 = torch.operator "onnx.MatMul"(%17282, %1142) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17306 = torch.operator "onnx.Add"(%708, %17305) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17307 = torch.operator "onnx.Shape"(%17304) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %17308 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17309 = torch.operator "onnx.Gather"(%17307, %17308) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17310 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17311 = torch.operator "onnx.Div"(%17309, %17310) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17312 = torch.operator "onnx.Cast"(%17311) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17313 = torch.operator "onnx.Cast"(%17312) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17314 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21856_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17315 = torch.operator "onnx.Unsqueeze"(%17300, %17314) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17316 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17317 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17318 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21860_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17319 = torch.operator "onnx.Unsqueeze"(%17313, %17318) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17320 = torch.operator "onnx.Concat"(%17315, %17316, %17317, %17319) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %17321 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21863_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17322 = torch.operator "onnx.Unsqueeze"(%17300, %17321) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17323 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17324 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17325 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21867_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17326 = torch.operator "onnx.Unsqueeze"(%17313, %17325) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17327 = torch.operator "onnx.Concat"(%17322, %17323, %17324, %17326) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %17328 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21870_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17329 = torch.operator "onnx.Unsqueeze"(%17300, %17328) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17330 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17331 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17332 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21874_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17333 = torch.operator "onnx.Unsqueeze"(%17313, %17332) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17334 = torch.operator "onnx.Concat"(%17329, %17330, %17331, %17333) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %17335 = torch.operator "onnx.Reshape"(%17302, %17320) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %17336 = torch.operator "onnx.Transpose"(%17335) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %17337 = torch.operator "onnx.Reshape"(%17304, %17327) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %17338 = torch.operator "onnx.Transpose"(%17337) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %17339 = torch.operator "onnx.Reshape"(%17306, %17334) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %17340 = torch.operator "onnx.Transpose"(%17339) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %17341 = torch.operator "onnx.Cast"(%17336) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %17342 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %17343 = torch.operator "onnx.Pow"(%17341, %17342) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %17344 = torch.operator "onnx.ReduceMean"(%17343) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %17345 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %17346 = torch.operator "onnx.Add"(%17344, %17345) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %17347 = torch.operator "onnx.Sqrt"(%17346) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %17348 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %17349 = torch.operator "onnx.Div"(%17348, %17347) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %17350 = torch.operator "onnx.Cast"(%17336) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %17351 = torch.operator "onnx.Mul"(%17350, %17349) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %17352 = torch.operator "onnx.Cast"(%17351) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %17353 = torch.operator "onnx.Mul"(%17352, %704) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %17354 = torch.operator "onnx.Cast"(%17338) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %17355 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %17356 = torch.operator "onnx.Pow"(%17354, %17355) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %17357 = torch.operator "onnx.ReduceMean"(%17356) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %17358 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %17359 = torch.operator "onnx.Add"(%17357, %17358) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %17360 = torch.operator "onnx.Sqrt"(%17359) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %17361 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %17362 = torch.operator "onnx.Div"(%17361, %17360) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %17363 = torch.operator "onnx.Cast"(%17338) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %17364 = torch.operator "onnx.Mul"(%17363, %17362) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %17365 = torch.operator "onnx.Cast"(%17364) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %17366 = torch.operator "onnx.Mul"(%17365, %705) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %17367 = torch.operator "onnx.Shape"(%17353) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %17368 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17369 = torch.operator "onnx.Gather"(%17367, %17368) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17370 = torch.operator "onnx.Shape"(%17353) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %17371 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17372 = torch.operator "onnx.Gather"(%17370, %17371) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17373 = torch.operator "onnx.Shape"(%17353) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %17374 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17375 = torch.operator "onnx.Gather"(%17373, %17374) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17376 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21918_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17377 = torch.operator "onnx.Unsqueeze"(%17369, %17376) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17378 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21920_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17379 = torch.operator "onnx.Unsqueeze"(%17372, %17378) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17380 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21922_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17381 = torch.operator "onnx.Unsqueeze"(%17375, %17380) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17382 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17383 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17384 = torch.operator "onnx.Concat"(%17377, %17379, %17381, %17382, %17383) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %17385 = torch.operator "onnx.Reshape"(%17353, %17384) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %17386 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %17387:2 = torch.operator "onnx.Split"(%17385, %17386) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %17388 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17389 = torch.operator "onnx.Squeeze"(%17387#0, %17388) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %17390 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17391 = torch.operator "onnx.Squeeze"(%17387#1, %17390) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %17392 = torch.operator "onnx.Neg"(%17391) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %17393 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17394 = torch.operator "onnx.Unsqueeze"(%17392, %17393) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %17395 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17396 = torch.operator "onnx.Unsqueeze"(%17389, %17395) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %17397 = torch.operator "onnx.Concat"(%17394, %17396) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %17398 = torch.operator "onnx.Shape"(%17397) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %17399 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17400 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17401 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17402 = torch.operator "onnx.Slice"(%17398, %17400, %17401, %17399) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %17403 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17404 = torch.operator "onnx.Concat"(%17402, %17403) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %17405 = torch.operator "onnx.Reshape"(%17397, %17404) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %17406 = torch.operator "onnx.Cast"(%17353) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %17407 = torch.operator "onnx.Mul"(%17406, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %17408 = torch.operator "onnx.Cast"(%17405) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %17409 = torch.operator "onnx.Mul"(%17408, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %17410 = torch.operator "onnx.Add"(%17407, %17409) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %17411 = torch.operator "onnx.Cast"(%17410) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %17412 = torch.operator "onnx.Shape"(%17366) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %17413 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17414 = torch.operator "onnx.Gather"(%17412, %17413) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17415 = torch.operator "onnx.Shape"(%17366) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %17416 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17417 = torch.operator "onnx.Gather"(%17415, %17416) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17418 = torch.operator "onnx.Shape"(%17366) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %17419 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17420 = torch.operator "onnx.Gather"(%17418, %17419) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17421 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21963_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17422 = torch.operator "onnx.Unsqueeze"(%17414, %17421) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17423 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21965_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17424 = torch.operator "onnx.Unsqueeze"(%17417, %17423) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17425 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_21967_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17426 = torch.operator "onnx.Unsqueeze"(%17420, %17425) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17427 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17428 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17429 = torch.operator "onnx.Concat"(%17422, %17424, %17426, %17427, %17428) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %17430 = torch.operator "onnx.Reshape"(%17366, %17429) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %17431 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %17432:2 = torch.operator "onnx.Split"(%17430, %17431) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %17433 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17434 = torch.operator "onnx.Squeeze"(%17432#0, %17433) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %17435 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17436 = torch.operator "onnx.Squeeze"(%17432#1, %17435) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %17437 = torch.operator "onnx.Neg"(%17436) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %17438 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17439 = torch.operator "onnx.Unsqueeze"(%17437, %17438) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %17440 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17441 = torch.operator "onnx.Unsqueeze"(%17434, %17440) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %17442 = torch.operator "onnx.Concat"(%17439, %17441) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %17443 = torch.operator "onnx.Shape"(%17442) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %17444 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17445 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17446 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17447 = torch.operator "onnx.Slice"(%17443, %17445, %17446, %17444) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %17448 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17449 = torch.operator "onnx.Concat"(%17447, %17448) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %17450 = torch.operator "onnx.Reshape"(%17442, %17449) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %17451 = torch.operator "onnx.Cast"(%17366) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %17452 = torch.operator "onnx.Mul"(%17451, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %17453 = torch.operator "onnx.Cast"(%17450) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %17454 = torch.operator "onnx.Mul"(%17453, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %17455 = torch.operator "onnx.Add"(%17452, %17454) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %17456 = torch.operator "onnx.Cast"(%17455) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %17457 = torch.operator "onnx.Shape"(%17411) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %17458 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17459 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17460 = torch.operator "onnx.Slice"(%17457, %17458, %17459) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17461 = torch.operator "onnx.Cast"(%17460) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %17462 = torch.operator "onnx.Sqrt"(%17461) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %17463 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %17464 = torch.operator "onnx.Cast"(%17462) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %17465 = torch.operator "onnx.Div"(%17463, %17464) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %17466 = torch.operator "onnx.Cast"(%17465) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %17467 = torch.operator "onnx.Transpose"(%17456) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %17468 = torch.operator "onnx.Sqrt"(%17466) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %17469 = torch.operator "onnx.Mul"(%17411, %17468) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %17470 = torch.operator "onnx.Sqrt"(%17466) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %17471 = torch.operator "onnx.Mul"(%17467, %17470) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %17472 = torch.operator "onnx.MatMul"(%17469, %17471) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %17473 = torch.operator "onnx.Softmax"(%17472) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %17474 = torch.operator "onnx.MatMul"(%17473, %17340) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %17475 = torch.operator "onnx.Transpose"(%17474) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %17476 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17477 = torch.operator "onnx.Mul"(%17313, %17476) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17478 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22020_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17479 = torch.operator "onnx.Unsqueeze"(%17300, %17478) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17480 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17481 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22023_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17482 = torch.operator "onnx.Unsqueeze"(%17477, %17481) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17483 = torch.operator "onnx.Concat"(%17479, %17480, %17482) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %17484 = torch.operator "onnx.Reshape"(%17475, %17483) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %17485 = torch.operator "onnx.Cast"(%17484) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %17486 = torch.operator "onnx.Concat"(%17485, %17297) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %17487 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.34_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17488 = torch.operator "onnx.Unsqueeze"(%17271, %17487) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %17489 = torch.operator "onnx.MatMul"(%17486, %1143) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17490 = torch.operator "onnx.Add"(%703, %17489) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17491 = torch.operator "onnx.Mul"(%17488, %17490) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17492 = torch.operator "onnx.Add"(%17253, %17491) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17493 = torch.operator "onnx.Gemm"(%1285, %709, %710) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %17494 = torch.operator "onnx.Shape"(%17493) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %17495 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17496 = torch.operator "onnx.Gather"(%17494, %17495) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17497 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17498 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17499 = torch.operator "onnx.Add"(%17496, %17498) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17500 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17501 = torch.operator "onnx.Div"(%17499, %17500) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17502 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17503 = torch.operator "onnx.Mul"(%17501, %17502) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17504 = torch.operator "onnx.Slice"(%17493, %17497, %17503, %17495) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %17505 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17506 = torch.operator "onnx.Mul"(%17501, %17505) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17507 = torch.operator "onnx.Slice"(%17493, %17503, %17506, %17495) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %17508 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17509 = torch.operator "onnx.Mul"(%17501, %17508) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17510 = torch.operator "onnx.Slice"(%17493, %17506, %17509, %17495) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.352Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.35/norm/norm/Constant_attr__value" : tensor<3072xbf16> %17511 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.352Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.352Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.35/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %17512 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.352Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %17513 = torch.operator "onnx.LayerNormalization"(%17492, %17511, %17512) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17514 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17515 = torch.operator "onnx.Unsqueeze"(%17507, %17514) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %17516 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %17517 = torch.operator "onnx.Add"(%17515, %17516) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %17518 = torch.operator "onnx.Mul"(%17513, %17517) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17519 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17520 = torch.operator "onnx.Unsqueeze"(%17504, %17519) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %17521 = torch.operator "onnx.Add"(%17518, %17520) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17522 = torch.operator "onnx.MatMul"(%17521, %1144) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17523 = torch.operator "onnx.Add"(%711, %17522) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17524 = torch.operator "onnx.Mul"(%17523, %17523) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17525 = torch.operator "onnx.Mul"(%17523, %17524) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17526 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %17527 = torch.operator "onnx.Mul"(%17526, %17525) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17528 = torch.operator "onnx.Add"(%17523, %17527) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17529 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %17530 = torch.operator "onnx.Mul"(%17529, %17528) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17531 = torch.operator "onnx.Tanh"(%17530) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17532 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %17533 = torch.operator "onnx.Add"(%17532, %17531) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17534 = torch.operator "onnx.Mul"(%17523, %17533) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17535 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %17536 = torch.operator "onnx.Mul"(%17535, %17534) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17537 = torch.operator "onnx.Shape"(%17521) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %17538 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17539 = torch.operator "onnx.Gather"(%17537, %17538) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17540 = torch.operator "onnx.MatMul"(%17521, %1145) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17541 = torch.operator "onnx.Add"(%715, %17540) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17542 = torch.operator "onnx.MatMul"(%17521, %1146) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17543 = torch.operator "onnx.Add"(%716, %17542) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17544 = torch.operator "onnx.MatMul"(%17521, %1147) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17545 = torch.operator "onnx.Add"(%717, %17544) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17546 = torch.operator "onnx.Shape"(%17543) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %17547 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17548 = torch.operator "onnx.Gather"(%17546, %17547) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17549 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17550 = torch.operator "onnx.Div"(%17548, %17549) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17551 = torch.operator "onnx.Cast"(%17550) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17552 = torch.operator "onnx.Cast"(%17551) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17553 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22095_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17554 = torch.operator "onnx.Unsqueeze"(%17539, %17553) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17555 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17556 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17557 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22099_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17558 = torch.operator "onnx.Unsqueeze"(%17552, %17557) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17559 = torch.operator "onnx.Concat"(%17554, %17555, %17556, %17558) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %17560 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22102_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17561 = torch.operator "onnx.Unsqueeze"(%17539, %17560) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17562 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17563 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17564 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22106_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17565 = torch.operator "onnx.Unsqueeze"(%17552, %17564) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17566 = torch.operator "onnx.Concat"(%17561, %17562, %17563, %17565) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %17567 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22109_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17568 = torch.operator "onnx.Unsqueeze"(%17539, %17567) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17569 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17570 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17571 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22113_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17572 = torch.operator "onnx.Unsqueeze"(%17552, %17571) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17573 = torch.operator "onnx.Concat"(%17568, %17569, %17570, %17572) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %17574 = torch.operator "onnx.Reshape"(%17541, %17559) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %17575 = torch.operator "onnx.Transpose"(%17574) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %17576 = torch.operator "onnx.Reshape"(%17543, %17566) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %17577 = torch.operator "onnx.Transpose"(%17576) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %17578 = torch.operator "onnx.Reshape"(%17545, %17573) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %17579 = torch.operator "onnx.Transpose"(%17578) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %17580 = torch.operator "onnx.Cast"(%17575) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %17581 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %17582 = torch.operator "onnx.Pow"(%17580, %17581) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %17583 = torch.operator "onnx.ReduceMean"(%17582) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %17584 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %17585 = torch.operator "onnx.Add"(%17583, %17584) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %17586 = torch.operator "onnx.Sqrt"(%17585) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %17587 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %17588 = torch.operator "onnx.Div"(%17587, %17586) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %17589 = torch.operator "onnx.Cast"(%17575) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %17590 = torch.operator "onnx.Mul"(%17589, %17588) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %17591 = torch.operator "onnx.Cast"(%17590) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %17592 = torch.operator "onnx.Mul"(%17591, %713) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %17593 = torch.operator "onnx.Cast"(%17577) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %17594 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %17595 = torch.operator "onnx.Pow"(%17593, %17594) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %17596 = torch.operator "onnx.ReduceMean"(%17595) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %17597 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %17598 = torch.operator "onnx.Add"(%17596, %17597) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %17599 = torch.operator "onnx.Sqrt"(%17598) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %17600 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %17601 = torch.operator "onnx.Div"(%17600, %17599) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %17602 = torch.operator "onnx.Cast"(%17577) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %17603 = torch.operator "onnx.Mul"(%17602, %17601) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %17604 = torch.operator "onnx.Cast"(%17603) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %17605 = torch.operator "onnx.Mul"(%17604, %714) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %17606 = torch.operator "onnx.Shape"(%17592) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %17607 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17608 = torch.operator "onnx.Gather"(%17606, %17607) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17609 = torch.operator "onnx.Shape"(%17592) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %17610 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17611 = torch.operator "onnx.Gather"(%17609, %17610) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17612 = torch.operator "onnx.Shape"(%17592) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %17613 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17614 = torch.operator "onnx.Gather"(%17612, %17613) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17615 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22157_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17616 = torch.operator "onnx.Unsqueeze"(%17608, %17615) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17617 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22159_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17618 = torch.operator "onnx.Unsqueeze"(%17611, %17617) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17619 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22161_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17620 = torch.operator "onnx.Unsqueeze"(%17614, %17619) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17621 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17622 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17623 = torch.operator "onnx.Concat"(%17616, %17618, %17620, %17621, %17622) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %17624 = torch.operator "onnx.Reshape"(%17592, %17623) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %17625 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %17626:2 = torch.operator "onnx.Split"(%17624, %17625) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %17627 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17628 = torch.operator "onnx.Squeeze"(%17626#0, %17627) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %17629 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17630 = torch.operator "onnx.Squeeze"(%17626#1, %17629) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %17631 = torch.operator "onnx.Neg"(%17630) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %17632 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17633 = torch.operator "onnx.Unsqueeze"(%17631, %17632) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %17634 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17635 = torch.operator "onnx.Unsqueeze"(%17628, %17634) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %17636 = torch.operator "onnx.Concat"(%17633, %17635) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %17637 = torch.operator "onnx.Shape"(%17636) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %17638 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17639 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17640 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17641 = torch.operator "onnx.Slice"(%17637, %17639, %17640, %17638) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %17642 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17643 = torch.operator "onnx.Concat"(%17641, %17642) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %17644 = torch.operator "onnx.Reshape"(%17636, %17643) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %17645 = torch.operator "onnx.Cast"(%17592) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %17646 = torch.operator "onnx.Mul"(%17645, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %17647 = torch.operator "onnx.Cast"(%17644) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %17648 = torch.operator "onnx.Mul"(%17647, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %17649 = torch.operator "onnx.Add"(%17646, %17648) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %17650 = torch.operator "onnx.Cast"(%17649) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %17651 = torch.operator "onnx.Shape"(%17605) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %17652 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17653 = torch.operator "onnx.Gather"(%17651, %17652) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17654 = torch.operator "onnx.Shape"(%17605) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %17655 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17656 = torch.operator "onnx.Gather"(%17654, %17655) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17657 = torch.operator "onnx.Shape"(%17605) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %17658 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17659 = torch.operator "onnx.Gather"(%17657, %17658) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17660 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22202_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17661 = torch.operator "onnx.Unsqueeze"(%17653, %17660) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17662 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22204_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17663 = torch.operator "onnx.Unsqueeze"(%17656, %17662) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17664 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22206_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17665 = torch.operator "onnx.Unsqueeze"(%17659, %17664) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17666 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17667 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17668 = torch.operator "onnx.Concat"(%17661, %17663, %17665, %17666, %17667) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %17669 = torch.operator "onnx.Reshape"(%17605, %17668) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %17670 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %17671:2 = torch.operator "onnx.Split"(%17669, %17670) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %17672 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17673 = torch.operator "onnx.Squeeze"(%17671#0, %17672) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %17674 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17675 = torch.operator "onnx.Squeeze"(%17671#1, %17674) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %17676 = torch.operator "onnx.Neg"(%17675) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %17677 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17678 = torch.operator "onnx.Unsqueeze"(%17676, %17677) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %17679 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17680 = torch.operator "onnx.Unsqueeze"(%17673, %17679) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %17681 = torch.operator "onnx.Concat"(%17678, %17680) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %17682 = torch.operator "onnx.Shape"(%17681) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %17683 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17684 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17685 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17686 = torch.operator "onnx.Slice"(%17682, %17684, %17685, %17683) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %17687 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17688 = torch.operator "onnx.Concat"(%17686, %17687) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %17689 = torch.operator "onnx.Reshape"(%17681, %17688) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %17690 = torch.operator "onnx.Cast"(%17605) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %17691 = torch.operator "onnx.Mul"(%17690, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %17692 = torch.operator "onnx.Cast"(%17689) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %17693 = torch.operator "onnx.Mul"(%17692, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %17694 = torch.operator "onnx.Add"(%17691, %17693) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %17695 = torch.operator "onnx.Cast"(%17694) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %17696 = torch.operator "onnx.Shape"(%17650) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %17697 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17698 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17699 = torch.operator "onnx.Slice"(%17696, %17697, %17698) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17700 = torch.operator "onnx.Cast"(%17699) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %17701 = torch.operator "onnx.Sqrt"(%17700) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %17702 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %17703 = torch.operator "onnx.Cast"(%17701) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %17704 = torch.operator "onnx.Div"(%17702, %17703) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %17705 = torch.operator "onnx.Cast"(%17704) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %17706 = torch.operator "onnx.Transpose"(%17695) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %17707 = torch.operator "onnx.Sqrt"(%17705) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %17708 = torch.operator "onnx.Mul"(%17650, %17707) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %17709 = torch.operator "onnx.Sqrt"(%17705) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %17710 = torch.operator "onnx.Mul"(%17706, %17709) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %17711 = torch.operator "onnx.MatMul"(%17708, %17710) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %17712 = torch.operator "onnx.Softmax"(%17711) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %17713 = torch.operator "onnx.MatMul"(%17712, %17579) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %17714 = torch.operator "onnx.Transpose"(%17713) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %17715 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17716 = torch.operator "onnx.Mul"(%17552, %17715) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17717 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22259_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17718 = torch.operator "onnx.Unsqueeze"(%17539, %17717) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17719 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17720 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22262_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17721 = torch.operator "onnx.Unsqueeze"(%17716, %17720) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17722 = torch.operator "onnx.Concat"(%17718, %17719, %17721) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %17723 = torch.operator "onnx.Reshape"(%17714, %17722) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %17724 = torch.operator "onnx.Cast"(%17723) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %17725 = torch.operator "onnx.Concat"(%17724, %17536) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %17726 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.35_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17727 = torch.operator "onnx.Unsqueeze"(%17510, %17726) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %17728 = torch.operator "onnx.MatMul"(%17725, %1148) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17729 = torch.operator "onnx.Add"(%712, %17728) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17730 = torch.operator "onnx.Mul"(%17727, %17729) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17731 = torch.operator "onnx.Add"(%17492, %17730) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17732 = torch.operator "onnx.Gemm"(%1285, %718, %719) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %17733 = torch.operator "onnx.Shape"(%17732) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %17734 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17735 = torch.operator "onnx.Gather"(%17733, %17734) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17736 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17737 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17738 = torch.operator "onnx.Add"(%17735, %17737) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17739 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17740 = torch.operator "onnx.Div"(%17738, %17739) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17741 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17742 = torch.operator "onnx.Mul"(%17740, %17741) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17743 = torch.operator "onnx.Slice"(%17732, %17736, %17742, %17734) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %17744 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17745 = torch.operator "onnx.Mul"(%17740, %17744) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17746 = torch.operator "onnx.Slice"(%17732, %17742, %17745, %17734) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %17747 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17748 = torch.operator "onnx.Mul"(%17740, %17747) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17749 = torch.operator "onnx.Slice"(%17732, %17745, %17748, %17734) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.362Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.36/norm/norm/Constant_attr__value" : tensor<3072xbf16> %17750 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.362Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.362Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.36/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %17751 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.362Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %17752 = torch.operator "onnx.LayerNormalization"(%17731, %17750, %17751) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17753 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17754 = torch.operator "onnx.Unsqueeze"(%17746, %17753) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %17755 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %17756 = torch.operator "onnx.Add"(%17754, %17755) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %17757 = torch.operator "onnx.Mul"(%17752, %17756) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17758 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17759 = torch.operator "onnx.Unsqueeze"(%17743, %17758) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %17760 = torch.operator "onnx.Add"(%17757, %17759) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17761 = torch.operator "onnx.MatMul"(%17760, %1149) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17762 = torch.operator "onnx.Add"(%720, %17761) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17763 = torch.operator "onnx.Mul"(%17762, %17762) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17764 = torch.operator "onnx.Mul"(%17762, %17763) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17765 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %17766 = torch.operator "onnx.Mul"(%17765, %17764) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17767 = torch.operator "onnx.Add"(%17762, %17766) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17768 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %17769 = torch.operator "onnx.Mul"(%17768, %17767) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17770 = torch.operator "onnx.Tanh"(%17769) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17771 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %17772 = torch.operator "onnx.Add"(%17771, %17770) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17773 = torch.operator "onnx.Mul"(%17762, %17772) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17774 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %17775 = torch.operator "onnx.Mul"(%17774, %17773) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %17776 = torch.operator "onnx.Shape"(%17760) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %17777 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17778 = torch.operator "onnx.Gather"(%17776, %17777) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17779 = torch.operator "onnx.MatMul"(%17760, %1150) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17780 = torch.operator "onnx.Add"(%724, %17779) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17781 = torch.operator "onnx.MatMul"(%17760, %1151) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17782 = torch.operator "onnx.Add"(%725, %17781) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17783 = torch.operator "onnx.MatMul"(%17760, %1152) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17784 = torch.operator "onnx.Add"(%726, %17783) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17785 = torch.operator "onnx.Shape"(%17782) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %17786 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17787 = torch.operator "onnx.Gather"(%17785, %17786) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17788 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17789 = torch.operator "onnx.Div"(%17787, %17788) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17790 = torch.operator "onnx.Cast"(%17789) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17791 = torch.operator "onnx.Cast"(%17790) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17792 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22334_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17793 = torch.operator "onnx.Unsqueeze"(%17778, %17792) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17794 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17795 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17796 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22338_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17797 = torch.operator "onnx.Unsqueeze"(%17791, %17796) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17798 = torch.operator "onnx.Concat"(%17793, %17794, %17795, %17797) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %17799 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22341_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17800 = torch.operator "onnx.Unsqueeze"(%17778, %17799) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17801 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17802 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17803 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22345_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17804 = torch.operator "onnx.Unsqueeze"(%17791, %17803) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17805 = torch.operator "onnx.Concat"(%17800, %17801, %17802, %17804) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %17806 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22348_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17807 = torch.operator "onnx.Unsqueeze"(%17778, %17806) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17808 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17809 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17810 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22352_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17811 = torch.operator "onnx.Unsqueeze"(%17791, %17810) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17812 = torch.operator "onnx.Concat"(%17807, %17808, %17809, %17811) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %17813 = torch.operator "onnx.Reshape"(%17780, %17798) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %17814 = torch.operator "onnx.Transpose"(%17813) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %17815 = torch.operator "onnx.Reshape"(%17782, %17805) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %17816 = torch.operator "onnx.Transpose"(%17815) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %17817 = torch.operator "onnx.Reshape"(%17784, %17812) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %17818 = torch.operator "onnx.Transpose"(%17817) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %17819 = torch.operator "onnx.Cast"(%17814) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %17820 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %17821 = torch.operator "onnx.Pow"(%17819, %17820) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %17822 = torch.operator "onnx.ReduceMean"(%17821) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %17823 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %17824 = torch.operator "onnx.Add"(%17822, %17823) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %17825 = torch.operator "onnx.Sqrt"(%17824) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %17826 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %17827 = torch.operator "onnx.Div"(%17826, %17825) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %17828 = torch.operator "onnx.Cast"(%17814) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %17829 = torch.operator "onnx.Mul"(%17828, %17827) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %17830 = torch.operator "onnx.Cast"(%17829) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %17831 = torch.operator "onnx.Mul"(%17830, %722) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %17832 = torch.operator "onnx.Cast"(%17816) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %17833 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %17834 = torch.operator "onnx.Pow"(%17832, %17833) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %17835 = torch.operator "onnx.ReduceMean"(%17834) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %17836 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %17837 = torch.operator "onnx.Add"(%17835, %17836) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %17838 = torch.operator "onnx.Sqrt"(%17837) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %17839 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %17840 = torch.operator "onnx.Div"(%17839, %17838) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %17841 = torch.operator "onnx.Cast"(%17816) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %17842 = torch.operator "onnx.Mul"(%17841, %17840) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %17843 = torch.operator "onnx.Cast"(%17842) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %17844 = torch.operator "onnx.Mul"(%17843, %723) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %17845 = torch.operator "onnx.Shape"(%17831) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %17846 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17847 = torch.operator "onnx.Gather"(%17845, %17846) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17848 = torch.operator "onnx.Shape"(%17831) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %17849 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17850 = torch.operator "onnx.Gather"(%17848, %17849) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17851 = torch.operator "onnx.Shape"(%17831) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %17852 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17853 = torch.operator "onnx.Gather"(%17851, %17852) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17854 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22396_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17855 = torch.operator "onnx.Unsqueeze"(%17847, %17854) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17856 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22398_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17857 = torch.operator "onnx.Unsqueeze"(%17850, %17856) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17858 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22400_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17859 = torch.operator "onnx.Unsqueeze"(%17853, %17858) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17860 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17861 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17862 = torch.operator "onnx.Concat"(%17855, %17857, %17859, %17860, %17861) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %17863 = torch.operator "onnx.Reshape"(%17831, %17862) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %17864 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %17865:2 = torch.operator "onnx.Split"(%17863, %17864) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %17866 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17867 = torch.operator "onnx.Squeeze"(%17865#0, %17866) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %17868 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17869 = torch.operator "onnx.Squeeze"(%17865#1, %17868) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %17870 = torch.operator "onnx.Neg"(%17869) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %17871 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17872 = torch.operator "onnx.Unsqueeze"(%17870, %17871) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %17873 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17874 = torch.operator "onnx.Unsqueeze"(%17867, %17873) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %17875 = torch.operator "onnx.Concat"(%17872, %17874) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %17876 = torch.operator "onnx.Shape"(%17875) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %17877 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17878 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17879 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17880 = torch.operator "onnx.Slice"(%17876, %17878, %17879, %17877) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %17881 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17882 = torch.operator "onnx.Concat"(%17880, %17881) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %17883 = torch.operator "onnx.Reshape"(%17875, %17882) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %17884 = torch.operator "onnx.Cast"(%17831) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %17885 = torch.operator "onnx.Mul"(%17884, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %17886 = torch.operator "onnx.Cast"(%17883) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %17887 = torch.operator "onnx.Mul"(%17886, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %17888 = torch.operator "onnx.Add"(%17885, %17887) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %17889 = torch.operator "onnx.Cast"(%17888) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %17890 = torch.operator "onnx.Shape"(%17844) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %17891 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17892 = torch.operator "onnx.Gather"(%17890, %17891) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17893 = torch.operator "onnx.Shape"(%17844) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %17894 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17895 = torch.operator "onnx.Gather"(%17893, %17894) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17896 = torch.operator "onnx.Shape"(%17844) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %17897 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17898 = torch.operator "onnx.Gather"(%17896, %17897) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17899 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22441_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17900 = torch.operator "onnx.Unsqueeze"(%17892, %17899) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17901 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22443_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17902 = torch.operator "onnx.Unsqueeze"(%17895, %17901) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17903 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22445_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17904 = torch.operator "onnx.Unsqueeze"(%17898, %17903) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17905 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17906 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17907 = torch.operator "onnx.Concat"(%17900, %17902, %17904, %17905, %17906) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %17908 = torch.operator "onnx.Reshape"(%17844, %17907) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %17909 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %17910:2 = torch.operator "onnx.Split"(%17908, %17909) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %17911 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17912 = torch.operator "onnx.Squeeze"(%17910#0, %17911) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %17913 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17914 = torch.operator "onnx.Squeeze"(%17910#1, %17913) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %17915 = torch.operator "onnx.Neg"(%17914) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %17916 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17917 = torch.operator "onnx.Unsqueeze"(%17915, %17916) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %17918 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17919 = torch.operator "onnx.Unsqueeze"(%17912, %17918) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %17920 = torch.operator "onnx.Concat"(%17917, %17919) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %17921 = torch.operator "onnx.Shape"(%17920) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %17922 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17923 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17924 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17925 = torch.operator "onnx.Slice"(%17921, %17923, %17924, %17922) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %17926 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17927 = torch.operator "onnx.Concat"(%17925, %17926) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %17928 = torch.operator "onnx.Reshape"(%17920, %17927) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %17929 = torch.operator "onnx.Cast"(%17844) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %17930 = torch.operator "onnx.Mul"(%17929, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %17931 = torch.operator "onnx.Cast"(%17928) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %17932 = torch.operator "onnx.Mul"(%17931, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %17933 = torch.operator "onnx.Add"(%17930, %17932) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %17934 = torch.operator "onnx.Cast"(%17933) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %17935 = torch.operator "onnx.Shape"(%17889) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %17936 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17937 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17938 = torch.operator "onnx.Slice"(%17935, %17936, %17937) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17939 = torch.operator "onnx.Cast"(%17938) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %17940 = torch.operator "onnx.Sqrt"(%17939) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %17941 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %17942 = torch.operator "onnx.Cast"(%17940) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %17943 = torch.operator "onnx.Div"(%17941, %17942) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %17944 = torch.operator "onnx.Cast"(%17943) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %17945 = torch.operator "onnx.Transpose"(%17934) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %17946 = torch.operator "onnx.Sqrt"(%17944) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %17947 = torch.operator "onnx.Mul"(%17889, %17946) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %17948 = torch.operator "onnx.Sqrt"(%17944) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %17949 = torch.operator "onnx.Mul"(%17945, %17948) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %17950 = torch.operator "onnx.MatMul"(%17947, %17949) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %17951 = torch.operator "onnx.Softmax"(%17950) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %17952 = torch.operator "onnx.MatMul"(%17951, %17818) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %17953 = torch.operator "onnx.Transpose"(%17952) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %17954 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %17955 = torch.operator "onnx.Mul"(%17791, %17954) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %17956 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22498_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17957 = torch.operator "onnx.Unsqueeze"(%17778, %17956) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17958 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17959 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22501_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17960 = torch.operator "onnx.Unsqueeze"(%17955, %17959) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17961 = torch.operator "onnx.Concat"(%17957, %17958, %17960) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %17962 = torch.operator "onnx.Reshape"(%17953, %17961) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %17963 = torch.operator "onnx.Cast"(%17962) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %17964 = torch.operator "onnx.Concat"(%17963, %17775) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %17965 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.36_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17966 = torch.operator "onnx.Unsqueeze"(%17749, %17965) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %17967 = torch.operator "onnx.MatMul"(%17964, %1153) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17968 = torch.operator "onnx.Add"(%721, %17967) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17969 = torch.operator "onnx.Mul"(%17966, %17968) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17970 = torch.operator "onnx.Add"(%17731, %17969) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17971 = torch.operator "onnx.Gemm"(%1285, %727, %728) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[9216,3072],bf16>, !torch.vtensor<[9216],bf16>) -> !torch.vtensor<[1,9216],bf16> %17972 = torch.operator "onnx.Shape"(%17971) : (!torch.vtensor<[1,9216],bf16>) -> !torch.vtensor<[2],si64> %17973 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_norm_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17974 = torch.operator "onnx.Gather"(%17972, %17973) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17975 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_norm_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17976 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_norm_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17977 = torch.operator "onnx.Add"(%17974, %17976) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17978 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_norm_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17979 = torch.operator "onnx.Div"(%17977, %17978) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17980 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_norm_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17981 = torch.operator "onnx.Mul"(%17979, %17980) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17982 = torch.operator "onnx.Slice"(%17971, %17975, %17981, %17973) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %17983 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_norm_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17984 = torch.operator "onnx.Mul"(%17979, %17983) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17985 = torch.operator "onnx.Slice"(%17971, %17981, %17984, %17973) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %17986 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_norm_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17987 = torch.operator "onnx.Mul"(%17979, %17986) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %17988 = torch.operator "onnx.Slice"(%17971, %17984, %17987, %17973) : (!torch.vtensor<[1,9216],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fsingle_transformer_blocks.372Fnorm2Fnorm2FConstant_attr__value = util.global.load @"/single_transformer_blocks.37/norm/norm/Constant_attr__value" : tensor<3072xbf16> %17989 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.372Fnorm2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fsingle_transformer_blocks.372Fnorm2Fnorm2FConstant_1_attr__value = util.global.load @"/single_transformer_blocks.37/norm/norm/Constant_1_attr__value" : tensor<3072xbf16> %17990 = torch_c.from_builtin_tensor %_2Fsingle_transformer_blocks.372Fnorm2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %17991 = torch.operator "onnx.LayerNormalization"(%17970, %17989, %17990) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17992 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_norm_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17993 = torch.operator "onnx.Unsqueeze"(%17985, %17992) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %17994 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_norm_Constant_8_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %17995 = torch.operator "onnx.Add"(%17993, %17994) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,1,?],bf16> %17996 = torch.operator "onnx.Mul"(%17991, %17995) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %17997 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_norm_Constant_9_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %17998 = torch.operator "onnx.Unsqueeze"(%17982, %17997) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %17999 = torch.operator "onnx.Add"(%17996, %17998) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %18000 = torch.operator "onnx.MatMul"(%17999, %1154) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %18001 = torch.operator "onnx.Add"(%729, %18000) : (!torch.vtensor<[12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %18002 = torch.operator "onnx.Mul"(%18001, %18001) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %18003 = torch.operator "onnx.Mul"(%18001, %18002) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %18004 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_act_mlp_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %18005 = torch.operator "onnx.Mul"(%18004, %18003) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %18006 = torch.operator "onnx.Add"(%18001, %18005) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %18007 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_act_mlp_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %18008 = torch.operator "onnx.Mul"(%18007, %18006) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %18009 = torch.operator "onnx.Tanh"(%18008) : (!torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %18010 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_act_mlp_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %18011 = torch.operator "onnx.Add"(%18010, %18009) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %18012 = torch.operator "onnx.Mul"(%18001, %18011) : (!torch.vtensor<[?,4608,12288],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %18013 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_act_mlp_Constant_3_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %18014 = torch.operator "onnx.Mul"(%18013, %18012) : (!torch.vtensor<[],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,12288],bf16> %18015 = torch.operator "onnx.Shape"(%17999) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %18016 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %18017 = torch.operator "onnx.Gather"(%18015, %18016) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %18018 = torch.operator "onnx.MatMul"(%17999, %1155) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %18019 = torch.operator "onnx.Add"(%733, %18018) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %18020 = torch.operator "onnx.MatMul"(%17999, %1156) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %18021 = torch.operator "onnx.Add"(%734, %18020) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %18022 = torch.operator "onnx.MatMul"(%17999, %1157) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[3072,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %18023 = torch.operator "onnx.Add"(%735, %18022) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %18024 = torch.operator "onnx.Shape"(%18021) : (!torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[3],si64> %18025 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %18026 = torch.operator "onnx.Gather"(%18024, %18025) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %18027 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %18028 = torch.operator "onnx.Div"(%18026, %18027) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %18029 = torch.operator "onnx.Cast"(%18028) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %18030 = torch.operator "onnx.Cast"(%18029) {torch.onnx.to = 7 : si64} : (!torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %18031 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22573_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18032 = torch.operator "onnx.Unsqueeze"(%18017, %18031) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %18033 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18034 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18035 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22577_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18036 = torch.operator "onnx.Unsqueeze"(%18030, %18035) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %18037 = torch.operator "onnx.Concat"(%18032, %18033, %18034, %18036) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %18038 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22580_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18039 = torch.operator "onnx.Unsqueeze"(%18017, %18038) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %18040 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18041 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18042 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22584_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18043 = torch.operator "onnx.Unsqueeze"(%18030, %18042) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %18044 = torch.operator "onnx.Concat"(%18039, %18040, %18041, %18043) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %18045 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22587_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18046 = torch.operator "onnx.Unsqueeze"(%18017, %18045) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %18047 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18048 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18049 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22591_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18050 = torch.operator "onnx.Unsqueeze"(%18030, %18049) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %18051 = torch.operator "onnx.Concat"(%18046, %18047, %18048, %18050) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %18052 = torch.operator "onnx.Reshape"(%18019, %18037) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %18053 = torch.operator "onnx.Transpose"(%18052) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %18054 = torch.operator "onnx.Reshape"(%18021, %18044) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %18055 = torch.operator "onnx.Transpose"(%18054) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %18056 = torch.operator "onnx.Reshape"(%18023, %18051) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %18057 = torch.operator "onnx.Transpose"(%18056) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %18058 = torch.operator "onnx.Cast"(%18053) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %18059 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_norm_q_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %18060 = torch.operator "onnx.Pow"(%18058, %18059) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %18061 = torch.operator "onnx.ReduceMean"(%18060) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %18062 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_norm_q_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %18063 = torch.operator "onnx.Add"(%18061, %18062) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %18064 = torch.operator "onnx.Sqrt"(%18063) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %18065 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_norm_q_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %18066 = torch.operator "onnx.Div"(%18065, %18064) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %18067 = torch.operator "onnx.Cast"(%18053) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %18068 = torch.operator "onnx.Mul"(%18067, %18066) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %18069 = torch.operator "onnx.Cast"(%18068) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %18070 = torch.operator "onnx.Mul"(%18069, %731) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %18071 = torch.operator "onnx.Cast"(%18055) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %18072 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_norm_k_Constant_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %18073 = torch.operator "onnx.Pow"(%18071, %18072) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,?],f32> %18074 = torch.operator "onnx.ReduceMean"(%18073) {torch.onnx.axes = [-1 : si64], torch.onnx.keepdims = 1 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,1],f32> %18075 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_norm_k_Constant_1_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %18076 = torch.operator "onnx.Add"(%18074, %18075) : (!torch.vtensor<[?,?,?,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[?,?,?,1],f32> %18077 = torch.operator "onnx.Sqrt"(%18076) : (!torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %18078 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_norm_k_Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],f32> %18079 = torch.operator "onnx.Div"(%18078, %18077) : (!torch.vtensor<[],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,1],f32> %18080 = torch.operator "onnx.Cast"(%18055) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %18081 = torch.operator "onnx.Mul"(%18080, %18079) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,1],f32>) -> !torch.vtensor<[?,?,?,?],f32> %18082 = torch.operator "onnx.Cast"(%18081) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?,?],f32>) -> !torch.vtensor<[?,?,?,?],bf16> %18083 = torch.operator "onnx.Mul"(%18082, %732) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[128],bf16>) -> !torch.vtensor<[?,?,?,128],bf16> %18084 = torch.operator "onnx.Shape"(%18070) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %18085 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_9_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %18086 = torch.operator "onnx.Gather"(%18084, %18085) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %18087 = torch.operator "onnx.Shape"(%18070) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %18088 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_10_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %18089 = torch.operator "onnx.Gather"(%18087, %18088) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %18090 = torch.operator "onnx.Shape"(%18070) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %18091 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_11_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %18092 = torch.operator "onnx.Gather"(%18090, %18091) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %18093 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22635_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18094 = torch.operator "onnx.Unsqueeze"(%18086, %18093) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %18095 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22637_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18096 = torch.operator "onnx.Unsqueeze"(%18089, %18095) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %18097 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22639_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18098 = torch.operator "onnx.Unsqueeze"(%18092, %18097) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %18099 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_12_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18100 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_13_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18101 = torch.operator "onnx.Concat"(%18094, %18096, %18098, %18099, %18100) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %18102 = torch.operator "onnx.Reshape"(%18070, %18101) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %18103 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_14_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %18104:2 = torch.operator "onnx.Split"(%18102, %18103) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %18105 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_15_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18106 = torch.operator "onnx.Squeeze"(%18104#0, %18105) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %18107 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_16_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18108 = torch.operator "onnx.Squeeze"(%18104#1, %18107) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %18109 = torch.operator "onnx.Neg"(%18108) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %18110 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_17_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18111 = torch.operator "onnx.Unsqueeze"(%18109, %18110) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %18112 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_18_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18113 = torch.operator "onnx.Unsqueeze"(%18106, %18112) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %18114 = torch.operator "onnx.Concat"(%18111, %18113) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %18115 = torch.operator "onnx.Shape"(%18114) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %18116 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_19_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18117 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_20_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18118 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_21_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18119 = torch.operator "onnx.Slice"(%18115, %18117, %18118, %18116) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %18120 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_22_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18121 = torch.operator "onnx.Concat"(%18119, %18120) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %18122 = torch.operator "onnx.Reshape"(%18114, %18121) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %18123 = torch.operator "onnx.Cast"(%18070) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %18124 = torch.operator "onnx.Mul"(%18123, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %18125 = torch.operator "onnx.Cast"(%18122) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %18126 = torch.operator "onnx.Mul"(%18125, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %18127 = torch.operator "onnx.Add"(%18124, %18126) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %18128 = torch.operator "onnx.Cast"(%18127) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %18129 = torch.operator "onnx.Shape"(%18083) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %18130 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_23_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %18131 = torch.operator "onnx.Gather"(%18129, %18130) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %18132 = torch.operator "onnx.Shape"(%18083) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %18133 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_24_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %18134 = torch.operator "onnx.Gather"(%18132, %18133) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %18135 = torch.operator "onnx.Shape"(%18083) : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[4],si64> %18136 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_25_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %18137 = torch.operator "onnx.Gather"(%18135, %18136) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[4],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %18138 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22680_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18139 = torch.operator "onnx.Unsqueeze"(%18131, %18138) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %18140 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22682_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18141 = torch.operator "onnx.Unsqueeze"(%18134, %18140) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %18142 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22684_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18143 = torch.operator "onnx.Unsqueeze"(%18137, %18142) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %18144 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_26_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18145 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_27_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18146 = torch.operator "onnx.Concat"(%18139, %18141, %18143, %18144, %18145) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[5],si64> %18147 = torch.operator "onnx.Reshape"(%18083, %18146) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,128],bf16>, !torch.vtensor<[5],si64>) -> !torch.vtensor<[?,?,?,?,2],bf16> %18148 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_28_attr__value> : tensor<2xsi64>} : () -> !torch.vtensor<[2],si64> %18149:2 = torch.operator "onnx.Split"(%18147, %18148) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[2],si64>) -> (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) %18150 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_29_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18151 = torch.operator "onnx.Squeeze"(%18149#0, %18150) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %18152 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_30_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18153 = torch.operator "onnx.Squeeze"(%18149#1, %18152) : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %18154 = torch.operator "onnx.Neg"(%18153) : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],bf16> %18155 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_31_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18156 = torch.operator "onnx.Unsqueeze"(%18154, %18155) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %18157 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_32_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18158 = torch.operator "onnx.Unsqueeze"(%18151, %18157) : (!torch.vtensor<[?,?,?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?,?,1],bf16> %18159 = torch.operator "onnx.Concat"(%18156, %18158) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,?,?,1],bf16>, !torch.vtensor<[?,?,?,?,1],bf16>) -> !torch.vtensor<[?,?,?,?,2],bf16> %18160 = torch.operator "onnx.Shape"(%18159) : (!torch.vtensor<[?,?,?,?,2],bf16>) -> !torch.vtensor<[5],si64> %18161 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_33_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18162 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_34_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18163 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_35_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18164 = torch.operator "onnx.Slice"(%18160, %18162, %18163, %18161) : (!torch.vtensor<[5],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %18165 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_36_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18166 = torch.operator "onnx.Concat"(%18164, %18165) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[4],si64> %18167 = torch.operator "onnx.Reshape"(%18159, %18166) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,?,?,?,2],bf16>, !torch.vtensor<[4],si64>) -> !torch.vtensor<[?,?,?,?],bf16> %18168 = torch.operator "onnx.Cast"(%18083) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,128],bf16>) -> !torch.vtensor<[?,?,?,128],f32> %18169 = torch.operator "onnx.Mul"(%18168, %1455) : (!torch.vtensor<[?,?,?,128],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %18170 = torch.operator "onnx.Cast"(%18167) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,?,?],f32> %18171 = torch.operator "onnx.Mul"(%18170, %1456) : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[1,1,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %18172 = torch.operator "onnx.Add"(%18169, %18171) : (!torch.vtensor<[?,?,4608,128],f32>, !torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],f32> %18173 = torch.operator "onnx.Cast"(%18172) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,4608,128],f32>) -> !torch.vtensor<[?,?,4608,128],bf16> %18174 = torch.operator "onnx.Shape"(%18128) : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[4],si64> %18175 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_37_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18176 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_38_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18177 = torch.operator "onnx.Slice"(%18174, %18175, %18176) : (!torch.vtensor<[4],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %18178 = torch.operator "onnx.Cast"(%18177) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],si64>) -> !torch.vtensor<[1],bf16> %18179 = torch.operator "onnx.Sqrt"(%18178) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %18180 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_39_attr__value> : tensor<1xf32>} : () -> !torch.vtensor<[1],f32> %18181 = torch.operator "onnx.Cast"(%18179) {torch.onnx.to = 1 : si64} : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],f32> %18182 = torch.operator "onnx.Div"(%18180, %18181) : (!torch.vtensor<[1],f32>, !torch.vtensor<[1],f32>) -> !torch.vtensor<[1],f32> %18183 = torch.operator "onnx.Cast"(%18182) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1],f32>) -> !torch.vtensor<[1],bf16> %18184 = torch.operator "onnx.Transpose"(%18173) {torch.onnx.perm = [0 : si64, 1 : si64, 3 : si64, 2 : si64]} : (!torch.vtensor<[?,?,4608,128],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %18185 = torch.operator "onnx.Sqrt"(%18183) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %18186 = torch.operator "onnx.Mul"(%18128, %18185) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,4608,128],bf16> %18187 = torch.operator "onnx.Sqrt"(%18183) : (!torch.vtensor<[1],bf16>) -> !torch.vtensor<[1],bf16> %18188 = torch.operator "onnx.Mul"(%18184, %18187) : (!torch.vtensor<[?,?,128,4608],bf16>, !torch.vtensor<[1],bf16>) -> !torch.vtensor<[?,?,128,4608],bf16> %18189 = torch.operator "onnx.MatMul"(%18186, %18188) : (!torch.vtensor<[?,?,4608,128],bf16>, !torch.vtensor<[?,?,128,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %18190 = torch.operator "onnx.Softmax"(%18189) {torch.onnx.axis = -1 : si64} : (!torch.vtensor<[?,?,4608,4608],bf16>) -> !torch.vtensor<[?,?,4608,4608],bf16> %18191 = torch.operator "onnx.MatMul"(%18190, %18057) : (!torch.vtensor<[?,?,4608,4608],bf16>, !torch.vtensor<[?,?,?,?],bf16>) -> !torch.vtensor<[?,?,4608,?],bf16> %18192 = torch.operator "onnx.Transpose"(%18191) {torch.onnx.perm = [0 : si64, 2 : si64, 1 : si64, 3 : si64]} : (!torch.vtensor<[?,?,4608,?],bf16>) -> !torch.vtensor<[?,4608,?,?],bf16> %18193 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_40_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %18194 = torch.operator "onnx.Mul"(%18030, %18193) : (!torch.vtensor<[],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %18195 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22737_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18196 = torch.operator "onnx.Unsqueeze"(%18017, %18195) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %18197 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_attn_Constant_41_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18198 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<_Constant_22740_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18199 = torch.operator "onnx.Unsqueeze"(%18194, %18198) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %18200 = torch.operator "onnx.Concat"(%18196, %18197, %18199) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[3],si64> %18201 = torch.operator "onnx.Reshape"(%18192, %18200) {torch.onnx.allowzero = 0 : si64} : (!torch.vtensor<[?,4608,?,?],bf16>, !torch.vtensor<[3],si64>) -> !torch.vtensor<[?,?,?],bf16> %18202 = torch.operator "onnx.Cast"(%18201) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[?,?,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %18203 = torch.operator "onnx.Concat"(%18202, %18014) {torch.onnx.axis = 2 : si64} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,4608,12288],bf16>) -> !torch.vtensor<[?,4608,?],bf16> %18204 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__single_transformer_blocks.37_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18205 = torch.operator "onnx.Unsqueeze"(%17988, %18204) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %18206 = torch.operator "onnx.MatMul"(%18203, %1158) : (!torch.vtensor<[?,4608,?],bf16>, !torch.vtensor<[15360,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %18207 = torch.operator "onnx.Add"(%730, %18206) : (!torch.vtensor<[3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %18208 = torch.operator "onnx.Mul"(%18205, %18207) : (!torch.vtensor<[?,1,?],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %18209 = torch.operator "onnx.Add"(%17970, %18208) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[?,4608,3072],bf16>) -> !torch.vtensor<[?,4608,3072],bf16> %18210 = torch.operator "onnx.Shape"(%9126) : (!torch.vtensor<[?,512,3072],bf16>) -> !torch.vtensor<[3],si64> %18211 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__Constant_2_attr__value> : tensor} : () -> !torch.vtensor<[],si64> %18212 = torch.operator "onnx.Gather"(%18210, %18211) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[3],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[],si64> %18213 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18214 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18215 = torch.operator "onnx.Unsqueeze"(%18212, %18214) : (!torch.vtensor<[],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %18216 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18217 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__Constant_6_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18218 = torch.operator "onnx.Slice"(%18209, %18215, %18216, %18213, %18217) : (!torch.vtensor<[?,4608,3072],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?,?],bf16> %18219 = torch.operator "onnx.Cast"(%1285) {torch.onnx.to = 16 : si64} : (!torch.vtensor<[1,3072],bf16>) -> !torch.vtensor<[1,3072],bf16> %18220 = torch.operator "onnx.Gemm"(%18219, %736, %737) {torch.onnx.alpha = 1.000000e+00 : f32, torch.onnx.beta = 1.000000e+00 : f32, torch.onnx.transB = 1 : si64} : (!torch.vtensor<[1,3072],bf16>, !torch.vtensor<[6144,3072],bf16>, !torch.vtensor<[6144],bf16>) -> !torch.vtensor<[1,6144],bf16> %18221 = torch.operator "onnx.Shape"(%18220) : (!torch.vtensor<[1,6144],bf16>) -> !torch.vtensor<[2],si64> %18222 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__norm_out_Constant_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18223 = torch.operator "onnx.Gather"(%18221, %18222) {torch.onnx.axis = 0 : si64} : (!torch.vtensor<[2],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %18224 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__norm_out_Constant_1_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18225 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__norm_out_Constant_2_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18226 = torch.operator "onnx.Add"(%18223, %18225) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %18227 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__norm_out_Constant_3_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18228 = torch.operator "onnx.Div"(%18226, %18227) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %18229 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__norm_out_Constant_4_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18230 = torch.operator "onnx.Mul"(%18228, %18229) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %18231 = torch.operator "onnx.Slice"(%18220, %18224, %18230, %18222) : (!torch.vtensor<[1,6144],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %18232 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__norm_out_Constant_5_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18233 = torch.operator "onnx.Mul"(%18228, %18232) : (!torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[1],si64> %18234 = torch.operator "onnx.Slice"(%18220, %18230, %18233, %18222) : (!torch.vtensor<[1,6144],bf16>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,?],bf16> %_2Fnorm_out2Fnorm2FConstant_attr__value = util.global.load @"/norm_out/norm/Constant_attr__value" : tensor<3072xbf16> %18235 = torch_c.from_builtin_tensor %_2Fnorm_out2Fnorm2FConstant_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %_2Fnorm_out2Fnorm2FConstant_1_attr__value = util.global.load @"/norm_out/norm/Constant_1_attr__value" : tensor<3072xbf16> %18236 = torch_c.from_builtin_tensor %_2Fnorm_out2Fnorm2FConstant_1_attr__value : tensor<3072xbf16> -> !torch.vtensor<[3072],bf16> %18237 = torch.operator "onnx.LayerNormalization"(%18218, %18235, %18236) {torch.onnx.axis = -1 : si64, torch.onnx.epsilon = 9.99999997E-7 : f32} : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072],bf16>, !torch.vtensor<[3072],bf16>) -> !torch.vtensor<[?,?,?],bf16> %18238 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__norm_out_Constant_6_attr__value> : tensor} : () -> !torch.vtensor<[],bf16> %18239 = torch.operator "onnx.Add"(%18231, %18238) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[],bf16>) -> !torch.vtensor<[?,?],bf16> %18240 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__norm_out_Constant_7_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18241 = torch.operator "onnx.Unsqueeze"(%18239, %18240) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %18242 = torch.operator "onnx.Mul"(%18237, %18241) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %18243 = torch.operator "onnx.Constant"() {torch.onnx.value = dense_resource<__norm_out_Constant_8_attr__value> : tensor<1xsi64>} : () -> !torch.vtensor<[1],si64> %18244 = torch.operator "onnx.Unsqueeze"(%18234, %18243) : (!torch.vtensor<[?,?],bf16>, !torch.vtensor<[1],si64>) -> !torch.vtensor<[?,1,?],bf16> %18245 = torch.operator "onnx.Add"(%18242, %18244) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[?,1,?],bf16>) -> !torch.vtensor<[?,?,?],bf16> %18246 = torch.operator "onnx.MatMul"(%18245, %1159) : (!torch.vtensor<[?,?,?],bf16>, !torch.vtensor<[3072,64],bf16>) -> !torch.vtensor<[?,?,64],bf16> %18247 = torch.operator "onnx.Add"(%738, %18246) : (!torch.vtensor<[64],bf16>, !torch.vtensor<[?,?,64],bf16>) -> !torch.vtensor<[?,?,64],bf16> return %18247 : !torch.vtensor<[?,?,64],bf16> } } {-# dialect_resources: { builtin: { _proj_out.bias: "0x080000005F3BDA3B93BA0DBB023D013D053D053DBABCAABCB1BCB7BC18BD0FBDFBBC1ABD123D293D163D1B3D3ABCE8BB0CBC62BBF63CEC3CDA3CDE3CB9BA70BBF3BAA1BBDBBCE8BCBFBCD1BCC6BCB7BCB3BCD5BCF4BC07BD03BD02BDD1BCD7BCBBBCD5BC09BC0FBC3FBC22BCEF3CCE3CD83CC93C883D913D8D3D903D593D4A3D503D453D", __Constant_attr__value: "0x080000007A44", __Constant_1_attr__value: "0x080000007A44", __time_text_embed_time_proj_Constant_attr__value: "0x080000000000000000000000", __time_text_embed_time_proj_Constant_1_attr__value: "0x080000000100000000000000", __time_text_embed_time_proj_Constant_2_attr__value: "0x080000000100000000000000", __time_text_embed_time_proj_Constant_4_attr__value: "0x080000000100000000000000", __time_text_embed_time_proj_Constant_5_attr__value: "0x080000008000000000000000", __time_text_embed_time_proj_Constant_6_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __time_text_embed_time_proj_Constant_7_attr__value: "0x080000000100000000000000", __time_text_embed_time_proj_Constant_8_attr__value: "0x080000000100000000000000", __time_text_embed_time_proj_Constant_9_attr__value: "0x080000000000000000000000", __time_text_embed_time_proj_Constant_10_attr__value: "0x080000008000000000000000", __time_text_embed_time_proj_Constant_11_attr__value: "0x080000000100000000000000", __time_text_embed_time_proj_1_Constant_attr__value: "0x080000000100000000000000", __time_text_embed_time_proj_1_Constant_2_attr__value: "0x080000000100000000000000", __time_text_embed_time_proj_1_Constant_3_attr__value: "0x080000008000000000000000", __time_text_embed_time_proj_1_Constant_4_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __time_text_embed_time_proj_1_Constant_5_attr__value: "0x080000000100000000000000", __time_text_embed_time_proj_1_Constant_6_attr__value: "0x080000000100000000000000", __time_text_embed_time_proj_1_Constant_7_attr__value: "0x080000000000000000000000", __time_text_embed_time_proj_1_Constant_8_attr__value: "0x080000008000000000000000", __time_text_embed_time_proj_1_Constant_9_attr__value: "0x080000000100000000000000", __pos_embed_Constant_attr__value: "0x080000000000803F9BE8A13ECDCCCC3DE286013D0AD7233C373E4F3B6F12833A5FCBA539", __pos_embed_Constant_1_attr__value: "0x080000000200000000000000", __pos_embed_Constant_2_attr__value: "0x080000000200000000000000", __pos_embed_Constant_3_attr__value: "0x08000000010000000000000001000000000000000200000000000000", __pos_embed_Constant_4_attr__value: "0x080000000200000000000000", __pos_embed_Constant_5_attr__value: "0x08000000010000000000000001000000000000000200000000000000", __pos_embed_Constant_6_attr__value: "0x080000000000803F523D383F3598043F51DABE3EA15A893E15B4453ECA480E3ECDCCCC3D4264933DBA26543DA7AE183D35C4DB3CAB299E3CAAA7633C0AD7233C9CD3EB3B95B8A93BA54A743B2BD02F3B770FFD3ABB1FB63A6F12833A4AA93C3ADEC6073AEA6EC33989A68C39C6724A39FCB21139", __pos_embed_Constant_7_attr__value: "0x080000000200000000000000", __pos_embed_Constant_8_attr__value: "0x08000000010000000000000001000000000000000200000000000000", __pos_embed_Constant_9_attr__value: "0x080000000200000000000000", __pos_embed_Constant_10_attr__value: "0x08000000010000000000000001000000000000000200000000000000", __pos_embed_Constant_11_attr__value: "0x080000000000803F523D383F3598043F51DABE3EA15A893E15B4453ECA480E3ECDCCCC3D4264933DBA26543DA7AE183D35C4DB3CAB299E3CAAA7633C0AD7233C9CD3EB3B95B8A93BA54A743B2BD02F3B770FFD3ABB1FB63A6F12833A4AA93C3ADEC6073AEA6EC33989A68C39C6724A39FCB21139", __pos_embed_Constant_12_attr__value: "0x080000000200000000000000", __pos_embed_Constant_13_attr__value: "0x08000000010000000000000001000000000000000200000000000000", __pos_embed_Constant_14_attr__value: "0x080000000200000000000000", __pos_embed_Constant_15_attr__value: "0x08000000010000000000000001000000000000000200000000000000", __transformer_blocks.0_norm1_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.0_norm1_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.0_norm1_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.0_norm1_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.0_norm1_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.0_norm1_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.0_norm1_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.0_norm1_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.0_norm1_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.0_norm1_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.0_norm1_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.0_norm1_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.0_norm1_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.0_norm1_context_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.0_norm1_context_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.0_norm1_context_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.0_norm1_context_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.0_norm1_context_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.0_norm1_context_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.0_norm1_context_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.0_norm1_context_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.0_norm1_context_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.0_norm1_context_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.0_norm1_context_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.0_norm1_context_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.0_norm1_context_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.0_attn_Constant_attr__value: "0x080000000100000000000000FFFFFFFFFFFFFFFF18000000000000008000000000000000", __transformer_blocks.0_attn_Constant_1_attr__value: "0x080000000100000000000000FFFFFFFFFFFFFFFF18000000000000008000000000000000", __transformer_blocks.0_attn_Constant_2_attr__value: "0x080000000100000000000000FFFFFFFFFFFFFFFF18000000000000008000000000000000", __transformer_blocks.0_attn_Constant_3_attr__value: "0x080000000100000000000000FFFFFFFFFFFFFFFF18000000000000008000000000000000", __transformer_blocks.0_attn_Constant_4_attr__value: "0x080000000100000000000000FFFFFFFFFFFFFFFF18000000000000008000000000000000", __transformer_blocks.0_attn_Constant_5_attr__value: "0x080000000100000000000000FFFFFFFFFFFFFFFF18000000000000008000000000000000", __transformer_blocks.0_attn_norm_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.0_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.0_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.0_attn_norm_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.0_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.0_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.0_attn_norm_added_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.0_attn_norm_added_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.0_attn_norm_added_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.0_attn_norm_added_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.0_attn_norm_added_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.0_attn_norm_added_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.0_attn_Constant_6_attr__value: "0x080000000000000000000000", __transformer_blocks.0_attn_Constant_7_attr__value: "0x080000000100000000000000", __transformer_blocks.0_attn_Constant_8_attr__value: "0x080000000000000000000000", __transformer_blocks.0_attn_Constant_9_attr__value: "0x080000000100000000000000", __transformer_blocks.0_attn_Constant_10_attr__value: "0x08000000010000000000000018000000000000000012000000000000FFFFFFFFFFFFFFFF0200000000000000", __transformer_blocks.0_attn_Constant_11_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.0_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.0_attn_Constant_13_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.0_attn_Constant_14_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.0_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.0_attn_Constant_16_attr__value: "0x080000000000000000000000", __transformer_blocks.0_attn_Constant_17_attr__value: "0x080000000000000000000000", __transformer_blocks.0_attn_Constant_18_attr__value: "0x080000000300000000000000", __transformer_blocks.0_attn_Constant_19_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.0_attn_Constant_20_attr__value: "0x08000000010000000000000018000000000000000012000000000000FFFFFFFFFFFFFFFF0200000000000000", __transformer_blocks.0_attn_Constant_21_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.0_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.0_attn_Constant_23_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.0_attn_Constant_24_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.0_attn_Constant_25_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.0_attn_Constant_26_attr__value: "0x080000000000000000000000", __transformer_blocks.0_attn_Constant_27_attr__value: "0x080000000000000000000000", __transformer_blocks.0_attn_Constant_28_attr__value: "0x080000000300000000000000", __transformer_blocks.0_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.0_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.0_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.0_attn_Constant_32_attr__value: "0x080000000000803F", __transformer_blocks.0_attn_Constant_33_attr__value: "0x080000000100000000000000FFFFFFFFFFFFFFFF000C000000000000", __transformer_blocks.0_attn_Constant_34_attr__value: "0x080000000100000000000000", __transformer_blocks.0_attn_Constant_35_attr__value: "0x080000000000000000000000", __transformer_blocks.0_attn_Constant_36_attr__value: "0x080000000002000000000000", __transformer_blocks.0_attn_Constant_37_attr__value: "0x080000000100000000000000", __transformer_blocks.0_attn_Constant_38_attr__value: "0x080000000100000000000000", __transformer_blocks.0_attn_Constant_39_attr__value: "0x080000000002000000000000", __transformer_blocks.0_attn_Constant_40_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.0_attn_Constant_41_attr__value: "0x080000000100000000000000", __transformer_blocks.0_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.0_Constant_1_attr__value: "0x080000000100000000000000", __transformer_blocks.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.0_Constant_3_attr__value: "0x080000000100000000000000", __transformer_blocks.0_ff_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.0_ff_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.0_ff_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.0_ff_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.0_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.0_Constant_5_attr__value: "0x080000000100000000000000", __transformer_blocks.0_Constant_6_attr__value: "0x080000000100000000000000", __transformer_blocks.0_Constant_7_attr__value: "0x08000000803F", __transformer_blocks.0_Constant_8_attr__value: "0x080000000100000000000000", __transformer_blocks.0_ff_context_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.0_ff_context_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.0_ff_context_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.0_ff_context_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.0_Constant_9_attr__value: "0x080000000100000000000000", __transformer_blocks.1_norm1_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.1_norm1_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.1_norm1_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.1_norm1_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.1_norm1_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.1_norm1_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.1_norm1_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.1_norm1_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.1_norm1_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.1_norm1_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.1_norm1_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.1_norm1_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.1_norm1_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.1_norm1_context_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.1_norm1_context_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.1_norm1_context_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.1_norm1_context_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.1_norm1_context_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.1_norm1_context_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.1_norm1_context_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.1_norm1_context_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.1_norm1_context_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.1_norm1_context_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.1_norm1_context_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.1_norm1_context_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.1_norm1_context_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.1_attn_Constant_attr__value: "0x080000000000000000000000", __transformer_blocks.1_attn_Constant_1_attr__value: "0x080000000200000000000000", __transformer_blocks.1_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_6255_attr__value: "0x080000000000000000000000", __transformer_blocks.1_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.1_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_6259_attr__value: "0x080000000000000000000000", _Constant_6262_attr__value: "0x080000000000000000000000", __transformer_blocks.1_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.1_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_6266_attr__value: "0x080000000000000000000000", _Constant_6269_attr__value: "0x080000000000000000000000", __transformer_blocks.1_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.1_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_6273_attr__value: "0x080000000000000000000000", _Constant_6276_attr__value: "0x080000000000000000000000", __transformer_blocks.1_attn_Constant_9_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.1_attn_Constant_10_attr__value: "0x080000001800000000000000", _Constant_6280_attr__value: "0x080000000000000000000000", _Constant_6283_attr__value: "0x080000000000000000000000", __transformer_blocks.1_attn_Constant_11_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.1_attn_Constant_12_attr__value: "0x080000001800000000000000", _Constant_6287_attr__value: "0x080000000000000000000000", _Constant_6290_attr__value: "0x080000000000000000000000", __transformer_blocks.1_attn_Constant_13_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.1_attn_Constant_14_attr__value: "0x080000001800000000000000", _Constant_6294_attr__value: "0x080000000000000000000000", __transformer_blocks.1_attn_norm_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.1_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.1_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.1_attn_norm_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.1_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.1_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.1_attn_norm_added_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.1_attn_norm_added_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.1_attn_norm_added_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.1_attn_norm_added_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.1_attn_norm_added_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.1_attn_norm_added_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.1_attn_Constant_15_attr__value: "0x080000000000000000000000", __transformer_blocks.1_attn_Constant_16_attr__value: "0x080000000100000000000000", __transformer_blocks.1_attn_Constant_17_attr__value: "0x080000000200000000000000", _Constant_6379_attr__value: "0x080000000000000000000000", _Constant_6381_attr__value: "0x080000000000000000000000", _Constant_6383_attr__value: "0x080000000000000000000000", __transformer_blocks.1_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.1_attn_Constant_19_attr__value: "0x080000000200000000000000", __transformer_blocks.1_attn_Constant_20_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.1_attn_Constant_21_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.1_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.1_attn_Constant_23_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.1_attn_Constant_24_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.1_attn_Constant_25_attr__value: "0x080000000000000000000000", __transformer_blocks.1_attn_Constant_26_attr__value: "0x080000000000000000000000", __transformer_blocks.1_attn_Constant_27_attr__value: "0x080000000300000000000000", __transformer_blocks.1_attn_Constant_28_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.1_attn_Constant_29_attr__value: "0x080000000000000000000000", __transformer_blocks.1_attn_Constant_30_attr__value: "0x080000000100000000000000", __transformer_blocks.1_attn_Constant_31_attr__value: "0x080000000200000000000000", _Constant_6424_attr__value: "0x080000000000000000000000", _Constant_6426_attr__value: "0x080000000000000000000000", _Constant_6428_attr__value: "0x080000000000000000000000", __transformer_blocks.1_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.1_attn_Constant_33_attr__value: "0x080000000200000000000000", __transformer_blocks.1_attn_Constant_34_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.1_attn_Constant_35_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.1_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.1_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.1_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.1_attn_Constant_39_attr__value: "0x080000000000000000000000", __transformer_blocks.1_attn_Constant_40_attr__value: "0x080000000000000000000000", __transformer_blocks.1_attn_Constant_41_attr__value: "0x080000000300000000000000", __transformer_blocks.1_attn_Constant_42_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.1_attn_Constant_43_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.1_attn_Constant_44_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.1_attn_Constant_45_attr__value: "0x080000000000803F", __transformer_blocks.1_attn_Constant_46_attr__value: "0x080000001800000000000000", _Constant_6481_attr__value: "0x080000000000000000000000", __transformer_blocks.1_attn_Constant_47_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_6484_attr__value: "0x080000000000000000000000", __transformer_blocks.1_attn_Constant_48_attr__value: "0x080000000100000000000000", __transformer_blocks.1_attn_Constant_49_attr__value: "0x080000000100000000000000", __transformer_blocks.1_attn_Constant_50_attr__value: "0x080000000000000000000000", __transformer_blocks.1_attn_Constant_51_attr__value: "0x080000000000000000000000", __transformer_blocks.1_attn_Constant_52_attr__value: "0x080000000100000000000000", __transformer_blocks.1_attn_Constant_53_attr__value: "0x080000000100000000000000", __transformer_blocks.1_attn_Constant_54_attr__value: "0x080000000000000000000000", __transformer_blocks.1_attn_Constant_55_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.1_attn_Constant_56_attr__value: "0x080000000100000000000000", __transformer_blocks.1_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.1_Constant_1_attr__value: "0x080000000100000000000000", __transformer_blocks.1_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.1_Constant_3_attr__value: "0x080000000100000000000000", __transformer_blocks.1_ff_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.1_ff_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.1_ff_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.1_ff_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.1_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.1_Constant_5_attr__value: "0x080000000100000000000000", __transformer_blocks.1_Constant_6_attr__value: "0x080000000100000000000000", __transformer_blocks.1_Constant_7_attr__value: "0x08000000803F", __transformer_blocks.1_Constant_8_attr__value: "0x080000000100000000000000", __transformer_blocks.1_ff_context_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.1_ff_context_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.1_ff_context_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.1_ff_context_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.1_Constant_9_attr__value: "0x080000000100000000000000", __transformer_blocks.2_norm1_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.2_norm1_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.2_norm1_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.2_norm1_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.2_norm1_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.2_norm1_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.2_norm1_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.2_norm1_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.2_norm1_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.2_norm1_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.2_norm1_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.2_norm1_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.2_norm1_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.2_norm1_context_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.2_norm1_context_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.2_norm1_context_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.2_norm1_context_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.2_norm1_context_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.2_norm1_context_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.2_norm1_context_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.2_norm1_context_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.2_norm1_context_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.2_norm1_context_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.2_norm1_context_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.2_norm1_context_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.2_norm1_context_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.2_attn_Constant_attr__value: "0x080000000000000000000000", __transformer_blocks.2_attn_Constant_1_attr__value: "0x080000000200000000000000", __transformer_blocks.2_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_6672_attr__value: "0x080000000000000000000000", __transformer_blocks.2_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.2_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_6676_attr__value: "0x080000000000000000000000", _Constant_6679_attr__value: "0x080000000000000000000000", __transformer_blocks.2_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.2_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_6683_attr__value: "0x080000000000000000000000", _Constant_6686_attr__value: "0x080000000000000000000000", __transformer_blocks.2_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.2_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_6690_attr__value: "0x080000000000000000000000", _Constant_6693_attr__value: "0x080000000000000000000000", __transformer_blocks.2_attn_Constant_9_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.2_attn_Constant_10_attr__value: "0x080000001800000000000000", _Constant_6697_attr__value: "0x080000000000000000000000", _Constant_6700_attr__value: "0x080000000000000000000000", __transformer_blocks.2_attn_Constant_11_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.2_attn_Constant_12_attr__value: "0x080000001800000000000000", _Constant_6704_attr__value: "0x080000000000000000000000", _Constant_6707_attr__value: "0x080000000000000000000000", __transformer_blocks.2_attn_Constant_13_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.2_attn_Constant_14_attr__value: "0x080000001800000000000000", _Constant_6711_attr__value: "0x080000000000000000000000", __transformer_blocks.2_attn_norm_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.2_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.2_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.2_attn_norm_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.2_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.2_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.2_attn_norm_added_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.2_attn_norm_added_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.2_attn_norm_added_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.2_attn_norm_added_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.2_attn_norm_added_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.2_attn_norm_added_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.2_attn_Constant_15_attr__value: "0x080000000000000000000000", __transformer_blocks.2_attn_Constant_16_attr__value: "0x080000000100000000000000", __transformer_blocks.2_attn_Constant_17_attr__value: "0x080000000200000000000000", _Constant_6796_attr__value: "0x080000000000000000000000", _Constant_6798_attr__value: "0x080000000000000000000000", _Constant_6800_attr__value: "0x080000000000000000000000", __transformer_blocks.2_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.2_attn_Constant_19_attr__value: "0x080000000200000000000000", __transformer_blocks.2_attn_Constant_20_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.2_attn_Constant_21_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.2_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.2_attn_Constant_23_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.2_attn_Constant_24_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.2_attn_Constant_25_attr__value: "0x080000000000000000000000", __transformer_blocks.2_attn_Constant_26_attr__value: "0x080000000000000000000000", __transformer_blocks.2_attn_Constant_27_attr__value: "0x080000000300000000000000", __transformer_blocks.2_attn_Constant_28_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.2_attn_Constant_29_attr__value: "0x080000000000000000000000", __transformer_blocks.2_attn_Constant_30_attr__value: "0x080000000100000000000000", __transformer_blocks.2_attn_Constant_31_attr__value: "0x080000000200000000000000", _Constant_6841_attr__value: "0x080000000000000000000000", _Constant_6843_attr__value: "0x080000000000000000000000", _Constant_6845_attr__value: "0x080000000000000000000000", __transformer_blocks.2_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.2_attn_Constant_33_attr__value: "0x080000000200000000000000", __transformer_blocks.2_attn_Constant_34_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.2_attn_Constant_35_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.2_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.2_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.2_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.2_attn_Constant_39_attr__value: "0x080000000000000000000000", __transformer_blocks.2_attn_Constant_40_attr__value: "0x080000000000000000000000", __transformer_blocks.2_attn_Constant_41_attr__value: "0x080000000300000000000000", __transformer_blocks.2_attn_Constant_42_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.2_attn_Constant_43_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.2_attn_Constant_44_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.2_attn_Constant_45_attr__value: "0x080000000000803F", __transformer_blocks.2_attn_Constant_46_attr__value: "0x080000001800000000000000", _Constant_6898_attr__value: "0x080000000000000000000000", __transformer_blocks.2_attn_Constant_47_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_6901_attr__value: "0x080000000000000000000000", __transformer_blocks.2_attn_Constant_48_attr__value: "0x080000000100000000000000", __transformer_blocks.2_attn_Constant_49_attr__value: "0x080000000100000000000000", __transformer_blocks.2_attn_Constant_50_attr__value: "0x080000000000000000000000", __transformer_blocks.2_attn_Constant_51_attr__value: "0x080000000000000000000000", __transformer_blocks.2_attn_Constant_52_attr__value: "0x080000000100000000000000", __transformer_blocks.2_attn_Constant_53_attr__value: "0x080000000100000000000000", __transformer_blocks.2_attn_Constant_54_attr__value: "0x080000000000000000000000", __transformer_blocks.2_attn_Constant_55_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.2_attn_Constant_56_attr__value: "0x080000000100000000000000", __transformer_blocks.2_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.2_Constant_1_attr__value: "0x080000000100000000000000", __transformer_blocks.2_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.2_Constant_3_attr__value: "0x080000000100000000000000", __transformer_blocks.2_ff_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.2_ff_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.2_ff_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.2_ff_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.2_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.2_Constant_5_attr__value: "0x080000000100000000000000", __transformer_blocks.2_Constant_6_attr__value: "0x080000000100000000000000", __transformer_blocks.2_Constant_7_attr__value: "0x08000000803F", __transformer_blocks.2_Constant_8_attr__value: "0x080000000100000000000000", __transformer_blocks.2_ff_context_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.2_ff_context_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.2_ff_context_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.2_ff_context_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.2_Constant_9_attr__value: "0x080000000100000000000000", __transformer_blocks.3_norm1_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.3_norm1_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.3_norm1_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.3_norm1_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.3_norm1_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.3_norm1_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.3_norm1_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.3_norm1_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.3_norm1_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.3_norm1_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.3_norm1_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.3_norm1_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.3_norm1_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.3_norm1_context_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.3_norm1_context_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.3_norm1_context_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.3_norm1_context_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.3_norm1_context_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.3_norm1_context_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.3_norm1_context_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.3_norm1_context_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.3_norm1_context_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.3_norm1_context_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.3_norm1_context_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.3_norm1_context_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.3_norm1_context_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.3_attn_Constant_attr__value: "0x080000000000000000000000", __transformer_blocks.3_attn_Constant_1_attr__value: "0x080000000200000000000000", __transformer_blocks.3_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_7089_attr__value: "0x080000000000000000000000", __transformer_blocks.3_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.3_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_7093_attr__value: "0x080000000000000000000000", _Constant_7096_attr__value: "0x080000000000000000000000", __transformer_blocks.3_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.3_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_7100_attr__value: "0x080000000000000000000000", _Constant_7103_attr__value: "0x080000000000000000000000", __transformer_blocks.3_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.3_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_7107_attr__value: "0x080000000000000000000000", _Constant_7110_attr__value: "0x080000000000000000000000", __transformer_blocks.3_attn_Constant_9_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.3_attn_Constant_10_attr__value: "0x080000001800000000000000", _Constant_7114_attr__value: "0x080000000000000000000000", _Constant_7117_attr__value: "0x080000000000000000000000", __transformer_blocks.3_attn_Constant_11_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.3_attn_Constant_12_attr__value: "0x080000001800000000000000", _Constant_7121_attr__value: "0x080000000000000000000000", _Constant_7124_attr__value: "0x080000000000000000000000", __transformer_blocks.3_attn_Constant_13_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.3_attn_Constant_14_attr__value: "0x080000001800000000000000", _Constant_7128_attr__value: "0x080000000000000000000000", __transformer_blocks.3_attn_norm_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.3_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.3_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.3_attn_norm_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.3_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.3_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.3_attn_norm_added_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.3_attn_norm_added_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.3_attn_norm_added_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.3_attn_norm_added_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.3_attn_norm_added_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.3_attn_norm_added_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.3_attn_Constant_15_attr__value: "0x080000000000000000000000", __transformer_blocks.3_attn_Constant_16_attr__value: "0x080000000100000000000000", __transformer_blocks.3_attn_Constant_17_attr__value: "0x080000000200000000000000", _Constant_7213_attr__value: "0x080000000000000000000000", _Constant_7215_attr__value: "0x080000000000000000000000", _Constant_7217_attr__value: "0x080000000000000000000000", __transformer_blocks.3_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.3_attn_Constant_19_attr__value: "0x080000000200000000000000", __transformer_blocks.3_attn_Constant_20_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.3_attn_Constant_21_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.3_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.3_attn_Constant_23_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.3_attn_Constant_24_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.3_attn_Constant_25_attr__value: "0x080000000000000000000000", __transformer_blocks.3_attn_Constant_26_attr__value: "0x080000000000000000000000", __transformer_blocks.3_attn_Constant_27_attr__value: "0x080000000300000000000000", __transformer_blocks.3_attn_Constant_28_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.3_attn_Constant_29_attr__value: "0x080000000000000000000000", __transformer_blocks.3_attn_Constant_30_attr__value: "0x080000000100000000000000", __transformer_blocks.3_attn_Constant_31_attr__value: "0x080000000200000000000000", _Constant_7258_attr__value: "0x080000000000000000000000", _Constant_7260_attr__value: "0x080000000000000000000000", _Constant_7262_attr__value: "0x080000000000000000000000", __transformer_blocks.3_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.3_attn_Constant_33_attr__value: "0x080000000200000000000000", __transformer_blocks.3_attn_Constant_34_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.3_attn_Constant_35_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.3_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.3_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.3_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.3_attn_Constant_39_attr__value: "0x080000000000000000000000", __transformer_blocks.3_attn_Constant_40_attr__value: "0x080000000000000000000000", __transformer_blocks.3_attn_Constant_41_attr__value: "0x080000000300000000000000", __transformer_blocks.3_attn_Constant_42_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.3_attn_Constant_43_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.3_attn_Constant_44_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.3_attn_Constant_45_attr__value: "0x080000000000803F", __transformer_blocks.3_attn_Constant_46_attr__value: "0x080000001800000000000000", _Constant_7315_attr__value: "0x080000000000000000000000", __transformer_blocks.3_attn_Constant_47_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_7318_attr__value: "0x080000000000000000000000", __transformer_blocks.3_attn_Constant_48_attr__value: "0x080000000100000000000000", __transformer_blocks.3_attn_Constant_49_attr__value: "0x080000000100000000000000", __transformer_blocks.3_attn_Constant_50_attr__value: "0x080000000000000000000000", __transformer_blocks.3_attn_Constant_51_attr__value: "0x080000000000000000000000", __transformer_blocks.3_attn_Constant_52_attr__value: "0x080000000100000000000000", __transformer_blocks.3_attn_Constant_53_attr__value: "0x080000000100000000000000", __transformer_blocks.3_attn_Constant_54_attr__value: "0x080000000000000000000000", __transformer_blocks.3_attn_Constant_55_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.3_attn_Constant_56_attr__value: "0x080000000100000000000000", __transformer_blocks.3_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.3_Constant_1_attr__value: "0x080000000100000000000000", __transformer_blocks.3_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.3_Constant_3_attr__value: "0x080000000100000000000000", __transformer_blocks.3_ff_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.3_ff_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.3_ff_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.3_ff_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.3_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.3_Constant_5_attr__value: "0x080000000100000000000000", __transformer_blocks.3_Constant_6_attr__value: "0x080000000100000000000000", __transformer_blocks.3_Constant_7_attr__value: "0x08000000803F", __transformer_blocks.3_Constant_8_attr__value: "0x080000000100000000000000", __transformer_blocks.3_ff_context_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.3_ff_context_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.3_ff_context_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.3_ff_context_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.3_Constant_9_attr__value: "0x080000000100000000000000", __transformer_blocks.4_norm1_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.4_norm1_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.4_norm1_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.4_norm1_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.4_norm1_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.4_norm1_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.4_norm1_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.4_norm1_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.4_norm1_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.4_norm1_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.4_norm1_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.4_norm1_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.4_norm1_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.4_norm1_context_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.4_norm1_context_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.4_norm1_context_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.4_norm1_context_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.4_norm1_context_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.4_norm1_context_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.4_norm1_context_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.4_norm1_context_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.4_norm1_context_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.4_norm1_context_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.4_norm1_context_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.4_norm1_context_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.4_norm1_context_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.4_attn_Constant_attr__value: "0x080000000000000000000000", __transformer_blocks.4_attn_Constant_1_attr__value: "0x080000000200000000000000", __transformer_blocks.4_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_7506_attr__value: "0x080000000000000000000000", __transformer_blocks.4_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.4_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_7510_attr__value: "0x080000000000000000000000", _Constant_7513_attr__value: "0x080000000000000000000000", __transformer_blocks.4_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.4_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_7517_attr__value: "0x080000000000000000000000", _Constant_7520_attr__value: "0x080000000000000000000000", __transformer_blocks.4_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.4_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_7524_attr__value: "0x080000000000000000000000", _Constant_7527_attr__value: "0x080000000000000000000000", __transformer_blocks.4_attn_Constant_9_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.4_attn_Constant_10_attr__value: "0x080000001800000000000000", _Constant_7531_attr__value: "0x080000000000000000000000", _Constant_7534_attr__value: "0x080000000000000000000000", __transformer_blocks.4_attn_Constant_11_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.4_attn_Constant_12_attr__value: "0x080000001800000000000000", _Constant_7538_attr__value: "0x080000000000000000000000", _Constant_7541_attr__value: "0x080000000000000000000000", __transformer_blocks.4_attn_Constant_13_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.4_attn_Constant_14_attr__value: "0x080000001800000000000000", _Constant_7545_attr__value: "0x080000000000000000000000", __transformer_blocks.4_attn_norm_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.4_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.4_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.4_attn_norm_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.4_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.4_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.4_attn_norm_added_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.4_attn_norm_added_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.4_attn_norm_added_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.4_attn_norm_added_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.4_attn_norm_added_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.4_attn_norm_added_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.4_attn_Constant_15_attr__value: "0x080000000000000000000000", __transformer_blocks.4_attn_Constant_16_attr__value: "0x080000000100000000000000", __transformer_blocks.4_attn_Constant_17_attr__value: "0x080000000200000000000000", _Constant_7630_attr__value: "0x080000000000000000000000", _Constant_7632_attr__value: "0x080000000000000000000000", _Constant_7634_attr__value: "0x080000000000000000000000", __transformer_blocks.4_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.4_attn_Constant_19_attr__value: "0x080000000200000000000000", __transformer_blocks.4_attn_Constant_20_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.4_attn_Constant_21_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.4_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.4_attn_Constant_23_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.4_attn_Constant_24_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.4_attn_Constant_25_attr__value: "0x080000000000000000000000", __transformer_blocks.4_attn_Constant_26_attr__value: "0x080000000000000000000000", __transformer_blocks.4_attn_Constant_27_attr__value: "0x080000000300000000000000", __transformer_blocks.4_attn_Constant_28_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.4_attn_Constant_29_attr__value: "0x080000000000000000000000", __transformer_blocks.4_attn_Constant_30_attr__value: "0x080000000100000000000000", __transformer_blocks.4_attn_Constant_31_attr__value: "0x080000000200000000000000", _Constant_7675_attr__value: "0x080000000000000000000000", _Constant_7677_attr__value: "0x080000000000000000000000", _Constant_7679_attr__value: "0x080000000000000000000000", __transformer_blocks.4_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.4_attn_Constant_33_attr__value: "0x080000000200000000000000", __transformer_blocks.4_attn_Constant_34_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.4_attn_Constant_35_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.4_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.4_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.4_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.4_attn_Constant_39_attr__value: "0x080000000000000000000000", __transformer_blocks.4_attn_Constant_40_attr__value: "0x080000000000000000000000", __transformer_blocks.4_attn_Constant_41_attr__value: "0x080000000300000000000000", __transformer_blocks.4_attn_Constant_42_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.4_attn_Constant_43_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.4_attn_Constant_44_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.4_attn_Constant_45_attr__value: "0x080000000000803F", __transformer_blocks.4_attn_Constant_46_attr__value: "0x080000001800000000000000", _Constant_7732_attr__value: "0x080000000000000000000000", __transformer_blocks.4_attn_Constant_47_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_7735_attr__value: "0x080000000000000000000000", __transformer_blocks.4_attn_Constant_48_attr__value: "0x080000000100000000000000", __transformer_blocks.4_attn_Constant_49_attr__value: "0x080000000100000000000000", __transformer_blocks.4_attn_Constant_50_attr__value: "0x080000000000000000000000", __transformer_blocks.4_attn_Constant_51_attr__value: "0x080000000000000000000000", __transformer_blocks.4_attn_Constant_52_attr__value: "0x080000000100000000000000", __transformer_blocks.4_attn_Constant_53_attr__value: "0x080000000100000000000000", __transformer_blocks.4_attn_Constant_54_attr__value: "0x080000000000000000000000", __transformer_blocks.4_attn_Constant_55_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.4_attn_Constant_56_attr__value: "0x080000000100000000000000", __transformer_blocks.4_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.4_Constant_1_attr__value: "0x080000000100000000000000", __transformer_blocks.4_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.4_Constant_3_attr__value: "0x080000000100000000000000", __transformer_blocks.4_ff_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.4_ff_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.4_ff_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.4_ff_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.4_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.4_Constant_5_attr__value: "0x080000000100000000000000", __transformer_blocks.4_Constant_6_attr__value: "0x080000000100000000000000", __transformer_blocks.4_Constant_7_attr__value: "0x08000000803F", __transformer_blocks.4_Constant_8_attr__value: "0x080000000100000000000000", __transformer_blocks.4_ff_context_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.4_ff_context_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.4_ff_context_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.4_ff_context_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.4_Constant_9_attr__value: "0x080000000100000000000000", __transformer_blocks.5_norm1_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.5_norm1_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.5_norm1_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.5_norm1_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.5_norm1_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.5_norm1_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.5_norm1_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.5_norm1_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.5_norm1_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.5_norm1_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.5_norm1_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.5_norm1_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.5_norm1_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.5_norm1_context_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.5_norm1_context_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.5_norm1_context_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.5_norm1_context_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.5_norm1_context_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.5_norm1_context_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.5_norm1_context_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.5_norm1_context_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.5_norm1_context_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.5_norm1_context_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.5_norm1_context_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.5_norm1_context_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.5_norm1_context_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.5_attn_Constant_attr__value: "0x080000000000000000000000", __transformer_blocks.5_attn_Constant_1_attr__value: "0x080000000200000000000000", __transformer_blocks.5_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_7923_attr__value: "0x080000000000000000000000", __transformer_blocks.5_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.5_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_7927_attr__value: "0x080000000000000000000000", _Constant_7930_attr__value: "0x080000000000000000000000", __transformer_blocks.5_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.5_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_7934_attr__value: "0x080000000000000000000000", _Constant_7937_attr__value: "0x080000000000000000000000", __transformer_blocks.5_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.5_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_7941_attr__value: "0x080000000000000000000000", _Constant_7944_attr__value: "0x080000000000000000000000", __transformer_blocks.5_attn_Constant_9_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.5_attn_Constant_10_attr__value: "0x080000001800000000000000", _Constant_7948_attr__value: "0x080000000000000000000000", _Constant_7951_attr__value: "0x080000000000000000000000", __transformer_blocks.5_attn_Constant_11_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.5_attn_Constant_12_attr__value: "0x080000001800000000000000", _Constant_7955_attr__value: "0x080000000000000000000000", _Constant_7958_attr__value: "0x080000000000000000000000", __transformer_blocks.5_attn_Constant_13_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.5_attn_Constant_14_attr__value: "0x080000001800000000000000", _Constant_7962_attr__value: "0x080000000000000000000000", __transformer_blocks.5_attn_norm_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.5_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.5_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.5_attn_norm_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.5_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.5_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.5_attn_norm_added_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.5_attn_norm_added_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.5_attn_norm_added_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.5_attn_norm_added_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.5_attn_norm_added_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.5_attn_norm_added_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.5_attn_Constant_15_attr__value: "0x080000000000000000000000", __transformer_blocks.5_attn_Constant_16_attr__value: "0x080000000100000000000000", __transformer_blocks.5_attn_Constant_17_attr__value: "0x080000000200000000000000", _Constant_8047_attr__value: "0x080000000000000000000000", _Constant_8049_attr__value: "0x080000000000000000000000", _Constant_8051_attr__value: "0x080000000000000000000000", __transformer_blocks.5_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.5_attn_Constant_19_attr__value: "0x080000000200000000000000", __transformer_blocks.5_attn_Constant_20_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.5_attn_Constant_21_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.5_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.5_attn_Constant_23_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.5_attn_Constant_24_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.5_attn_Constant_25_attr__value: "0x080000000000000000000000", __transformer_blocks.5_attn_Constant_26_attr__value: "0x080000000000000000000000", __transformer_blocks.5_attn_Constant_27_attr__value: "0x080000000300000000000000", __transformer_blocks.5_attn_Constant_28_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.5_attn_Constant_29_attr__value: "0x080000000000000000000000", __transformer_blocks.5_attn_Constant_30_attr__value: "0x080000000100000000000000", __transformer_blocks.5_attn_Constant_31_attr__value: "0x080000000200000000000000", _Constant_8092_attr__value: "0x080000000000000000000000", _Constant_8094_attr__value: "0x080000000000000000000000", _Constant_8096_attr__value: "0x080000000000000000000000", __transformer_blocks.5_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.5_attn_Constant_33_attr__value: "0x080000000200000000000000", __transformer_blocks.5_attn_Constant_34_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.5_attn_Constant_35_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.5_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.5_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.5_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.5_attn_Constant_39_attr__value: "0x080000000000000000000000", __transformer_blocks.5_attn_Constant_40_attr__value: "0x080000000000000000000000", __transformer_blocks.5_attn_Constant_41_attr__value: "0x080000000300000000000000", __transformer_blocks.5_attn_Constant_42_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.5_attn_Constant_43_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.5_attn_Constant_44_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.5_attn_Constant_45_attr__value: "0x080000000000803F", __transformer_blocks.5_attn_Constant_46_attr__value: "0x080000001800000000000000", _Constant_8149_attr__value: "0x080000000000000000000000", __transformer_blocks.5_attn_Constant_47_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_8152_attr__value: "0x080000000000000000000000", __transformer_blocks.5_attn_Constant_48_attr__value: "0x080000000100000000000000", __transformer_blocks.5_attn_Constant_49_attr__value: "0x080000000100000000000000", __transformer_blocks.5_attn_Constant_50_attr__value: "0x080000000000000000000000", __transformer_blocks.5_attn_Constant_51_attr__value: "0x080000000000000000000000", __transformer_blocks.5_attn_Constant_52_attr__value: "0x080000000100000000000000", __transformer_blocks.5_attn_Constant_53_attr__value: "0x080000000100000000000000", __transformer_blocks.5_attn_Constant_54_attr__value: "0x080000000000000000000000", __transformer_blocks.5_attn_Constant_55_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.5_attn_Constant_56_attr__value: "0x080000000100000000000000", __transformer_blocks.5_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.5_Constant_1_attr__value: "0x080000000100000000000000", __transformer_blocks.5_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.5_Constant_3_attr__value: "0x080000000100000000000000", __transformer_blocks.5_ff_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.5_ff_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.5_ff_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.5_ff_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.5_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.5_Constant_5_attr__value: "0x080000000100000000000000", __transformer_blocks.5_Constant_6_attr__value: "0x080000000100000000000000", __transformer_blocks.5_Constant_7_attr__value: "0x08000000803F", __transformer_blocks.5_Constant_8_attr__value: "0x080000000100000000000000", __transformer_blocks.5_ff_context_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.5_ff_context_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.5_ff_context_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.5_ff_context_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.5_Constant_9_attr__value: "0x080000000100000000000000", __transformer_blocks.6_norm1_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.6_norm1_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.6_norm1_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.6_norm1_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.6_norm1_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.6_norm1_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.6_norm1_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.6_norm1_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.6_norm1_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.6_norm1_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.6_norm1_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.6_norm1_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.6_norm1_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.6_norm1_context_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.6_norm1_context_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.6_norm1_context_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.6_norm1_context_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.6_norm1_context_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.6_norm1_context_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.6_norm1_context_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.6_norm1_context_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.6_norm1_context_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.6_norm1_context_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.6_norm1_context_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.6_norm1_context_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.6_norm1_context_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.6_attn_Constant_attr__value: "0x080000000000000000000000", __transformer_blocks.6_attn_Constant_1_attr__value: "0x080000000200000000000000", __transformer_blocks.6_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_8340_attr__value: "0x080000000000000000000000", __transformer_blocks.6_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.6_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_8344_attr__value: "0x080000000000000000000000", _Constant_8347_attr__value: "0x080000000000000000000000", __transformer_blocks.6_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.6_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_8351_attr__value: "0x080000000000000000000000", _Constant_8354_attr__value: "0x080000000000000000000000", __transformer_blocks.6_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.6_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_8358_attr__value: "0x080000000000000000000000", _Constant_8361_attr__value: "0x080000000000000000000000", __transformer_blocks.6_attn_Constant_9_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.6_attn_Constant_10_attr__value: "0x080000001800000000000000", _Constant_8365_attr__value: "0x080000000000000000000000", _Constant_8368_attr__value: "0x080000000000000000000000", __transformer_blocks.6_attn_Constant_11_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.6_attn_Constant_12_attr__value: "0x080000001800000000000000", _Constant_8372_attr__value: "0x080000000000000000000000", _Constant_8375_attr__value: "0x080000000000000000000000", __transformer_blocks.6_attn_Constant_13_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.6_attn_Constant_14_attr__value: "0x080000001800000000000000", _Constant_8379_attr__value: "0x080000000000000000000000", __transformer_blocks.6_attn_norm_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.6_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.6_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.6_attn_norm_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.6_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.6_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.6_attn_norm_added_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.6_attn_norm_added_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.6_attn_norm_added_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.6_attn_norm_added_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.6_attn_norm_added_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.6_attn_norm_added_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.6_attn_Constant_15_attr__value: "0x080000000000000000000000", __transformer_blocks.6_attn_Constant_16_attr__value: "0x080000000100000000000000", __transformer_blocks.6_attn_Constant_17_attr__value: "0x080000000200000000000000", _Constant_8464_attr__value: "0x080000000000000000000000", _Constant_8466_attr__value: "0x080000000000000000000000", _Constant_8468_attr__value: "0x080000000000000000000000", __transformer_blocks.6_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.6_attn_Constant_19_attr__value: "0x080000000200000000000000", __transformer_blocks.6_attn_Constant_20_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.6_attn_Constant_21_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.6_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.6_attn_Constant_23_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.6_attn_Constant_24_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.6_attn_Constant_25_attr__value: "0x080000000000000000000000", __transformer_blocks.6_attn_Constant_26_attr__value: "0x080000000000000000000000", __transformer_blocks.6_attn_Constant_27_attr__value: "0x080000000300000000000000", __transformer_blocks.6_attn_Constant_28_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.6_attn_Constant_29_attr__value: "0x080000000000000000000000", __transformer_blocks.6_attn_Constant_30_attr__value: "0x080000000100000000000000", __transformer_blocks.6_attn_Constant_31_attr__value: "0x080000000200000000000000", _Constant_8509_attr__value: "0x080000000000000000000000", _Constant_8511_attr__value: "0x080000000000000000000000", _Constant_8513_attr__value: "0x080000000000000000000000", __transformer_blocks.6_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.6_attn_Constant_33_attr__value: "0x080000000200000000000000", __transformer_blocks.6_attn_Constant_34_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.6_attn_Constant_35_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.6_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.6_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.6_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.6_attn_Constant_39_attr__value: "0x080000000000000000000000", __transformer_blocks.6_attn_Constant_40_attr__value: "0x080000000000000000000000", __transformer_blocks.6_attn_Constant_41_attr__value: "0x080000000300000000000000", __transformer_blocks.6_attn_Constant_42_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.6_attn_Constant_43_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.6_attn_Constant_44_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.6_attn_Constant_45_attr__value: "0x080000000000803F", __transformer_blocks.6_attn_Constant_46_attr__value: "0x080000001800000000000000", _Constant_8566_attr__value: "0x080000000000000000000000", __transformer_blocks.6_attn_Constant_47_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_8569_attr__value: "0x080000000000000000000000", __transformer_blocks.6_attn_Constant_48_attr__value: "0x080000000100000000000000", __transformer_blocks.6_attn_Constant_49_attr__value: "0x080000000100000000000000", __transformer_blocks.6_attn_Constant_50_attr__value: "0x080000000000000000000000", __transformer_blocks.6_attn_Constant_51_attr__value: "0x080000000000000000000000", __transformer_blocks.6_attn_Constant_52_attr__value: "0x080000000100000000000000", __transformer_blocks.6_attn_Constant_53_attr__value: "0x080000000100000000000000", __transformer_blocks.6_attn_Constant_54_attr__value: "0x080000000000000000000000", __transformer_blocks.6_attn_Constant_55_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.6_attn_Constant_56_attr__value: "0x080000000100000000000000", __transformer_blocks.6_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.6_Constant_1_attr__value: "0x080000000100000000000000", __transformer_blocks.6_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.6_Constant_3_attr__value: "0x080000000100000000000000", __transformer_blocks.6_ff_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.6_ff_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.6_ff_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.6_ff_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.6_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.6_Constant_5_attr__value: "0x080000000100000000000000", __transformer_blocks.6_Constant_6_attr__value: "0x080000000100000000000000", __transformer_blocks.6_Constant_7_attr__value: "0x08000000803F", __transformer_blocks.6_Constant_8_attr__value: "0x080000000100000000000000", __transformer_blocks.6_ff_context_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.6_ff_context_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.6_ff_context_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.6_ff_context_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.6_Constant_9_attr__value: "0x080000000100000000000000", __transformer_blocks.7_norm1_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.7_norm1_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.7_norm1_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.7_norm1_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.7_norm1_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.7_norm1_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.7_norm1_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.7_norm1_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.7_norm1_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.7_norm1_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.7_norm1_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.7_norm1_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.7_norm1_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.7_norm1_context_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.7_norm1_context_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.7_norm1_context_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.7_norm1_context_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.7_norm1_context_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.7_norm1_context_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.7_norm1_context_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.7_norm1_context_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.7_norm1_context_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.7_norm1_context_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.7_norm1_context_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.7_norm1_context_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.7_norm1_context_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.7_attn_Constant_attr__value: "0x080000000000000000000000", __transformer_blocks.7_attn_Constant_1_attr__value: "0x080000000200000000000000", __transformer_blocks.7_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_8757_attr__value: "0x080000000000000000000000", __transformer_blocks.7_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.7_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_8761_attr__value: "0x080000000000000000000000", _Constant_8764_attr__value: "0x080000000000000000000000", __transformer_blocks.7_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.7_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_8768_attr__value: "0x080000000000000000000000", _Constant_8771_attr__value: "0x080000000000000000000000", __transformer_blocks.7_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.7_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_8775_attr__value: "0x080000000000000000000000", _Constant_8778_attr__value: "0x080000000000000000000000", __transformer_blocks.7_attn_Constant_9_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.7_attn_Constant_10_attr__value: "0x080000001800000000000000", _Constant_8782_attr__value: "0x080000000000000000000000", _Constant_8785_attr__value: "0x080000000000000000000000", __transformer_blocks.7_attn_Constant_11_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.7_attn_Constant_12_attr__value: "0x080000001800000000000000", _Constant_8789_attr__value: "0x080000000000000000000000", _Constant_8792_attr__value: "0x080000000000000000000000", __transformer_blocks.7_attn_Constant_13_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.7_attn_Constant_14_attr__value: "0x080000001800000000000000", _Constant_8796_attr__value: "0x080000000000000000000000", __transformer_blocks.7_attn_norm_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.7_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.7_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.7_attn_norm_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.7_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.7_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.7_attn_norm_added_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.7_attn_norm_added_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.7_attn_norm_added_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.7_attn_norm_added_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.7_attn_norm_added_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.7_attn_norm_added_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.7_attn_Constant_15_attr__value: "0x080000000000000000000000", __transformer_blocks.7_attn_Constant_16_attr__value: "0x080000000100000000000000", __transformer_blocks.7_attn_Constant_17_attr__value: "0x080000000200000000000000", _Constant_8881_attr__value: "0x080000000000000000000000", _Constant_8883_attr__value: "0x080000000000000000000000", _Constant_8885_attr__value: "0x080000000000000000000000", __transformer_blocks.7_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.7_attn_Constant_19_attr__value: "0x080000000200000000000000", __transformer_blocks.7_attn_Constant_20_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.7_attn_Constant_21_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.7_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.7_attn_Constant_23_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.7_attn_Constant_24_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.7_attn_Constant_25_attr__value: "0x080000000000000000000000", __transformer_blocks.7_attn_Constant_26_attr__value: "0x080000000000000000000000", __transformer_blocks.7_attn_Constant_27_attr__value: "0x080000000300000000000000", __transformer_blocks.7_attn_Constant_28_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.7_attn_Constant_29_attr__value: "0x080000000000000000000000", __transformer_blocks.7_attn_Constant_30_attr__value: "0x080000000100000000000000", __transformer_blocks.7_attn_Constant_31_attr__value: "0x080000000200000000000000", _Constant_8926_attr__value: "0x080000000000000000000000", _Constant_8928_attr__value: "0x080000000000000000000000", _Constant_8930_attr__value: "0x080000000000000000000000", __transformer_blocks.7_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.7_attn_Constant_33_attr__value: "0x080000000200000000000000", __transformer_blocks.7_attn_Constant_34_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.7_attn_Constant_35_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.7_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.7_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.7_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.7_attn_Constant_39_attr__value: "0x080000000000000000000000", __transformer_blocks.7_attn_Constant_40_attr__value: "0x080000000000000000000000", __transformer_blocks.7_attn_Constant_41_attr__value: "0x080000000300000000000000", __transformer_blocks.7_attn_Constant_42_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.7_attn_Constant_43_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.7_attn_Constant_44_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.7_attn_Constant_45_attr__value: "0x080000000000803F", __transformer_blocks.7_attn_Constant_46_attr__value: "0x080000001800000000000000", _Constant_8983_attr__value: "0x080000000000000000000000", __transformer_blocks.7_attn_Constant_47_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_8986_attr__value: "0x080000000000000000000000", __transformer_blocks.7_attn_Constant_48_attr__value: "0x080000000100000000000000", __transformer_blocks.7_attn_Constant_49_attr__value: "0x080000000100000000000000", __transformer_blocks.7_attn_Constant_50_attr__value: "0x080000000000000000000000", __transformer_blocks.7_attn_Constant_51_attr__value: "0x080000000000000000000000", __transformer_blocks.7_attn_Constant_52_attr__value: "0x080000000100000000000000", __transformer_blocks.7_attn_Constant_53_attr__value: "0x080000000100000000000000", __transformer_blocks.7_attn_Constant_54_attr__value: "0x080000000000000000000000", __transformer_blocks.7_attn_Constant_55_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.7_attn_Constant_56_attr__value: "0x080000000100000000000000", __transformer_blocks.7_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.7_Constant_1_attr__value: "0x080000000100000000000000", __transformer_blocks.7_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.7_Constant_3_attr__value: "0x080000000100000000000000", __transformer_blocks.7_ff_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.7_ff_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.7_ff_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.7_ff_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.7_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.7_Constant_5_attr__value: "0x080000000100000000000000", __transformer_blocks.7_Constant_6_attr__value: "0x080000000100000000000000", __transformer_blocks.7_Constant_7_attr__value: "0x08000000803F", __transformer_blocks.7_Constant_8_attr__value: "0x080000000100000000000000", __transformer_blocks.7_ff_context_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.7_ff_context_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.7_ff_context_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.7_ff_context_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.7_Constant_9_attr__value: "0x080000000100000000000000", __transformer_blocks.8_norm1_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.8_norm1_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.8_norm1_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.8_norm1_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.8_norm1_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.8_norm1_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.8_norm1_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.8_norm1_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.8_norm1_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.8_norm1_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.8_norm1_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.8_norm1_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.8_norm1_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.8_norm1_context_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.8_norm1_context_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.8_norm1_context_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.8_norm1_context_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.8_norm1_context_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.8_norm1_context_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.8_norm1_context_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.8_norm1_context_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.8_norm1_context_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.8_norm1_context_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.8_norm1_context_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.8_norm1_context_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.8_norm1_context_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.8_attn_Constant_attr__value: "0x080000000000000000000000", __transformer_blocks.8_attn_Constant_1_attr__value: "0x080000000200000000000000", __transformer_blocks.8_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_9174_attr__value: "0x080000000000000000000000", __transformer_blocks.8_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.8_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_9178_attr__value: "0x080000000000000000000000", _Constant_9181_attr__value: "0x080000000000000000000000", __transformer_blocks.8_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.8_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_9185_attr__value: "0x080000000000000000000000", _Constant_9188_attr__value: "0x080000000000000000000000", __transformer_blocks.8_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.8_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_9192_attr__value: "0x080000000000000000000000", _Constant_9195_attr__value: "0x080000000000000000000000", __transformer_blocks.8_attn_Constant_9_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.8_attn_Constant_10_attr__value: "0x080000001800000000000000", _Constant_9199_attr__value: "0x080000000000000000000000", _Constant_9202_attr__value: "0x080000000000000000000000", __transformer_blocks.8_attn_Constant_11_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.8_attn_Constant_12_attr__value: "0x080000001800000000000000", _Constant_9206_attr__value: "0x080000000000000000000000", _Constant_9209_attr__value: "0x080000000000000000000000", __transformer_blocks.8_attn_Constant_13_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.8_attn_Constant_14_attr__value: "0x080000001800000000000000", _Constant_9213_attr__value: "0x080000000000000000000000", __transformer_blocks.8_attn_norm_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.8_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.8_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.8_attn_norm_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.8_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.8_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.8_attn_norm_added_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.8_attn_norm_added_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.8_attn_norm_added_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.8_attn_norm_added_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.8_attn_norm_added_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.8_attn_norm_added_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.8_attn_Constant_15_attr__value: "0x080000000000000000000000", __transformer_blocks.8_attn_Constant_16_attr__value: "0x080000000100000000000000", __transformer_blocks.8_attn_Constant_17_attr__value: "0x080000000200000000000000", _Constant_9298_attr__value: "0x080000000000000000000000", _Constant_9300_attr__value: "0x080000000000000000000000", _Constant_9302_attr__value: "0x080000000000000000000000", __transformer_blocks.8_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.8_attn_Constant_19_attr__value: "0x080000000200000000000000", __transformer_blocks.8_attn_Constant_20_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.8_attn_Constant_21_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.8_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.8_attn_Constant_23_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.8_attn_Constant_24_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.8_attn_Constant_25_attr__value: "0x080000000000000000000000", __transformer_blocks.8_attn_Constant_26_attr__value: "0x080000000000000000000000", __transformer_blocks.8_attn_Constant_27_attr__value: "0x080000000300000000000000", __transformer_blocks.8_attn_Constant_28_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.8_attn_Constant_29_attr__value: "0x080000000000000000000000", __transformer_blocks.8_attn_Constant_30_attr__value: "0x080000000100000000000000", __transformer_blocks.8_attn_Constant_31_attr__value: "0x080000000200000000000000", _Constant_9343_attr__value: "0x080000000000000000000000", _Constant_9345_attr__value: "0x080000000000000000000000", _Constant_9347_attr__value: "0x080000000000000000000000", __transformer_blocks.8_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.8_attn_Constant_33_attr__value: "0x080000000200000000000000", __transformer_blocks.8_attn_Constant_34_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.8_attn_Constant_35_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.8_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.8_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.8_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.8_attn_Constant_39_attr__value: "0x080000000000000000000000", __transformer_blocks.8_attn_Constant_40_attr__value: "0x080000000000000000000000", __transformer_blocks.8_attn_Constant_41_attr__value: "0x080000000300000000000000", __transformer_blocks.8_attn_Constant_42_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.8_attn_Constant_43_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.8_attn_Constant_44_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.8_attn_Constant_45_attr__value: "0x080000000000803F", __transformer_blocks.8_attn_Constant_46_attr__value: "0x080000001800000000000000", _Constant_9400_attr__value: "0x080000000000000000000000", __transformer_blocks.8_attn_Constant_47_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_9403_attr__value: "0x080000000000000000000000", __transformer_blocks.8_attn_Constant_48_attr__value: "0x080000000100000000000000", __transformer_blocks.8_attn_Constant_49_attr__value: "0x080000000100000000000000", __transformer_blocks.8_attn_Constant_50_attr__value: "0x080000000000000000000000", __transformer_blocks.8_attn_Constant_51_attr__value: "0x080000000000000000000000", __transformer_blocks.8_attn_Constant_52_attr__value: "0x080000000100000000000000", __transformer_blocks.8_attn_Constant_53_attr__value: "0x080000000100000000000000", __transformer_blocks.8_attn_Constant_54_attr__value: "0x080000000000000000000000", __transformer_blocks.8_attn_Constant_55_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.8_attn_Constant_56_attr__value: "0x080000000100000000000000", __transformer_blocks.8_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.8_Constant_1_attr__value: "0x080000000100000000000000", __transformer_blocks.8_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.8_Constant_3_attr__value: "0x080000000100000000000000", __transformer_blocks.8_ff_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.8_ff_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.8_ff_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.8_ff_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.8_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.8_Constant_5_attr__value: "0x080000000100000000000000", __transformer_blocks.8_Constant_6_attr__value: "0x080000000100000000000000", __transformer_blocks.8_Constant_7_attr__value: "0x08000000803F", __transformer_blocks.8_Constant_8_attr__value: "0x080000000100000000000000", __transformer_blocks.8_ff_context_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.8_ff_context_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.8_ff_context_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.8_ff_context_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.8_Constant_9_attr__value: "0x080000000100000000000000", __transformer_blocks.9_norm1_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.9_norm1_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.9_norm1_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.9_norm1_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.9_norm1_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.9_norm1_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.9_norm1_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.9_norm1_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.9_norm1_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.9_norm1_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.9_norm1_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.9_norm1_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.9_norm1_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.9_norm1_context_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.9_norm1_context_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.9_norm1_context_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.9_norm1_context_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.9_norm1_context_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.9_norm1_context_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.9_norm1_context_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.9_norm1_context_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.9_norm1_context_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.9_norm1_context_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.9_norm1_context_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.9_norm1_context_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.9_norm1_context_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.9_attn_Constant_attr__value: "0x080000000000000000000000", __transformer_blocks.9_attn_Constant_1_attr__value: "0x080000000200000000000000", __transformer_blocks.9_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_9591_attr__value: "0x080000000000000000000000", __transformer_blocks.9_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.9_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_9595_attr__value: "0x080000000000000000000000", _Constant_9598_attr__value: "0x080000000000000000000000", __transformer_blocks.9_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.9_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_9602_attr__value: "0x080000000000000000000000", _Constant_9605_attr__value: "0x080000000000000000000000", __transformer_blocks.9_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.9_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_9609_attr__value: "0x080000000000000000000000", _Constant_9612_attr__value: "0x080000000000000000000000", __transformer_blocks.9_attn_Constant_9_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.9_attn_Constant_10_attr__value: "0x080000001800000000000000", _Constant_9616_attr__value: "0x080000000000000000000000", _Constant_9619_attr__value: "0x080000000000000000000000", __transformer_blocks.9_attn_Constant_11_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.9_attn_Constant_12_attr__value: "0x080000001800000000000000", _Constant_9623_attr__value: "0x080000000000000000000000", _Constant_9626_attr__value: "0x080000000000000000000000", __transformer_blocks.9_attn_Constant_13_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.9_attn_Constant_14_attr__value: "0x080000001800000000000000", _Constant_9630_attr__value: "0x080000000000000000000000", __transformer_blocks.9_attn_norm_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.9_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.9_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.9_attn_norm_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.9_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.9_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.9_attn_norm_added_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.9_attn_norm_added_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.9_attn_norm_added_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.9_attn_norm_added_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.9_attn_norm_added_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.9_attn_norm_added_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.9_attn_Constant_15_attr__value: "0x080000000000000000000000", __transformer_blocks.9_attn_Constant_16_attr__value: "0x080000000100000000000000", __transformer_blocks.9_attn_Constant_17_attr__value: "0x080000000200000000000000", _Constant_9715_attr__value: "0x080000000000000000000000", _Constant_9717_attr__value: "0x080000000000000000000000", _Constant_9719_attr__value: "0x080000000000000000000000", __transformer_blocks.9_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.9_attn_Constant_19_attr__value: "0x080000000200000000000000", __transformer_blocks.9_attn_Constant_20_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.9_attn_Constant_21_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.9_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.9_attn_Constant_23_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.9_attn_Constant_24_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.9_attn_Constant_25_attr__value: "0x080000000000000000000000", __transformer_blocks.9_attn_Constant_26_attr__value: "0x080000000000000000000000", __transformer_blocks.9_attn_Constant_27_attr__value: "0x080000000300000000000000", __transformer_blocks.9_attn_Constant_28_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.9_attn_Constant_29_attr__value: "0x080000000000000000000000", __transformer_blocks.9_attn_Constant_30_attr__value: "0x080000000100000000000000", __transformer_blocks.9_attn_Constant_31_attr__value: "0x080000000200000000000000", _Constant_9760_attr__value: "0x080000000000000000000000", _Constant_9762_attr__value: "0x080000000000000000000000", _Constant_9764_attr__value: "0x080000000000000000000000", __transformer_blocks.9_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.9_attn_Constant_33_attr__value: "0x080000000200000000000000", __transformer_blocks.9_attn_Constant_34_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.9_attn_Constant_35_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.9_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.9_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.9_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.9_attn_Constant_39_attr__value: "0x080000000000000000000000", __transformer_blocks.9_attn_Constant_40_attr__value: "0x080000000000000000000000", __transformer_blocks.9_attn_Constant_41_attr__value: "0x080000000300000000000000", __transformer_blocks.9_attn_Constant_42_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.9_attn_Constant_43_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.9_attn_Constant_44_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.9_attn_Constant_45_attr__value: "0x080000000000803F", __transformer_blocks.9_attn_Constant_46_attr__value: "0x080000001800000000000000", _Constant_9817_attr__value: "0x080000000000000000000000", __transformer_blocks.9_attn_Constant_47_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_9820_attr__value: "0x080000000000000000000000", __transformer_blocks.9_attn_Constant_48_attr__value: "0x080000000100000000000000", __transformer_blocks.9_attn_Constant_49_attr__value: "0x080000000100000000000000", __transformer_blocks.9_attn_Constant_50_attr__value: "0x080000000000000000000000", __transformer_blocks.9_attn_Constant_51_attr__value: "0x080000000000000000000000", __transformer_blocks.9_attn_Constant_52_attr__value: "0x080000000100000000000000", __transformer_blocks.9_attn_Constant_53_attr__value: "0x080000000100000000000000", __transformer_blocks.9_attn_Constant_54_attr__value: "0x080000000000000000000000", __transformer_blocks.9_attn_Constant_55_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.9_attn_Constant_56_attr__value: "0x080000000100000000000000", __transformer_blocks.9_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.9_Constant_1_attr__value: "0x080000000100000000000000", __transformer_blocks.9_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.9_Constant_3_attr__value: "0x080000000100000000000000", __transformer_blocks.9_ff_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.9_ff_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.9_ff_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.9_ff_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.9_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.9_Constant_5_attr__value: "0x080000000100000000000000", __transformer_blocks.9_Constant_6_attr__value: "0x080000000100000000000000", __transformer_blocks.9_Constant_7_attr__value: "0x08000000803F", __transformer_blocks.9_Constant_8_attr__value: "0x080000000100000000000000", __transformer_blocks.9_ff_context_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.9_ff_context_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.9_ff_context_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.9_ff_context_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.9_Constant_9_attr__value: "0x080000000100000000000000", __transformer_blocks.10_norm1_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.10_norm1_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.10_norm1_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.10_norm1_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.10_norm1_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.10_norm1_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.10_norm1_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.10_norm1_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.10_norm1_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.10_norm1_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.10_norm1_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.10_norm1_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.10_norm1_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.10_norm1_context_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.10_norm1_context_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.10_norm1_context_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.10_norm1_context_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.10_norm1_context_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.10_norm1_context_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.10_norm1_context_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.10_norm1_context_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.10_norm1_context_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.10_norm1_context_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.10_norm1_context_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.10_norm1_context_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.10_norm1_context_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.10_attn_Constant_attr__value: "0x080000000000000000000000", __transformer_blocks.10_attn_Constant_1_attr__value: "0x080000000200000000000000", __transformer_blocks.10_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_10008_attr__value: "0x080000000000000000000000", __transformer_blocks.10_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.10_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_10012_attr__value: "0x080000000000000000000000", _Constant_10015_attr__value: "0x080000000000000000000000", __transformer_blocks.10_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.10_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_10019_attr__value: "0x080000000000000000000000", _Constant_10022_attr__value: "0x080000000000000000000000", __transformer_blocks.10_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.10_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_10026_attr__value: "0x080000000000000000000000", _Constant_10029_attr__value: "0x080000000000000000000000", __transformer_blocks.10_attn_Constant_9_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.10_attn_Constant_10_attr__value: "0x080000001800000000000000", _Constant_10033_attr__value: "0x080000000000000000000000", _Constant_10036_attr__value: "0x080000000000000000000000", __transformer_blocks.10_attn_Constant_11_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.10_attn_Constant_12_attr__value: "0x080000001800000000000000", _Constant_10040_attr__value: "0x080000000000000000000000", _Constant_10043_attr__value: "0x080000000000000000000000", __transformer_blocks.10_attn_Constant_13_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.10_attn_Constant_14_attr__value: "0x080000001800000000000000", _Constant_10047_attr__value: "0x080000000000000000000000", __transformer_blocks.10_attn_norm_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.10_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.10_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.10_attn_norm_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.10_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.10_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.10_attn_norm_added_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.10_attn_norm_added_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.10_attn_norm_added_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.10_attn_norm_added_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.10_attn_norm_added_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.10_attn_norm_added_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.10_attn_Constant_15_attr__value: "0x080000000000000000000000", __transformer_blocks.10_attn_Constant_16_attr__value: "0x080000000100000000000000", __transformer_blocks.10_attn_Constant_17_attr__value: "0x080000000200000000000000", _Constant_10132_attr__value: "0x080000000000000000000000", _Constant_10134_attr__value: "0x080000000000000000000000", _Constant_10136_attr__value: "0x080000000000000000000000", __transformer_blocks.10_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.10_attn_Constant_19_attr__value: "0x080000000200000000000000", __transformer_blocks.10_attn_Constant_20_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.10_attn_Constant_21_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.10_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.10_attn_Constant_23_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.10_attn_Constant_24_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.10_attn_Constant_25_attr__value: "0x080000000000000000000000", __transformer_blocks.10_attn_Constant_26_attr__value: "0x080000000000000000000000", __transformer_blocks.10_attn_Constant_27_attr__value: "0x080000000300000000000000", __transformer_blocks.10_attn_Constant_28_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.10_attn_Constant_29_attr__value: "0x080000000000000000000000", __transformer_blocks.10_attn_Constant_30_attr__value: "0x080000000100000000000000", __transformer_blocks.10_attn_Constant_31_attr__value: "0x080000000200000000000000", _Constant_10177_attr__value: "0x080000000000000000000000", _Constant_10179_attr__value: "0x080000000000000000000000", _Constant_10181_attr__value: "0x080000000000000000000000", __transformer_blocks.10_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.10_attn_Constant_33_attr__value: "0x080000000200000000000000", __transformer_blocks.10_attn_Constant_34_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.10_attn_Constant_35_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.10_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.10_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.10_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.10_attn_Constant_39_attr__value: "0x080000000000000000000000", __transformer_blocks.10_attn_Constant_40_attr__value: "0x080000000000000000000000", __transformer_blocks.10_attn_Constant_41_attr__value: "0x080000000300000000000000", __transformer_blocks.10_attn_Constant_42_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.10_attn_Constant_43_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.10_attn_Constant_44_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.10_attn_Constant_45_attr__value: "0x080000000000803F", __transformer_blocks.10_attn_Constant_46_attr__value: "0x080000001800000000000000", _Constant_10234_attr__value: "0x080000000000000000000000", __transformer_blocks.10_attn_Constant_47_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_10237_attr__value: "0x080000000000000000000000", __transformer_blocks.10_attn_Constant_48_attr__value: "0x080000000100000000000000", __transformer_blocks.10_attn_Constant_49_attr__value: "0x080000000100000000000000", __transformer_blocks.10_attn_Constant_50_attr__value: "0x080000000000000000000000", __transformer_blocks.10_attn_Constant_51_attr__value: "0x080000000000000000000000", __transformer_blocks.10_attn_Constant_52_attr__value: "0x080000000100000000000000", __transformer_blocks.10_attn_Constant_53_attr__value: "0x080000000100000000000000", __transformer_blocks.10_attn_Constant_54_attr__value: "0x080000000000000000000000", __transformer_blocks.10_attn_Constant_55_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.10_attn_Constant_56_attr__value: "0x080000000100000000000000", __transformer_blocks.10_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.10_Constant_1_attr__value: "0x080000000100000000000000", __transformer_blocks.10_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.10_Constant_3_attr__value: "0x080000000100000000000000", __transformer_blocks.10_ff_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.10_ff_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.10_ff_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.10_ff_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.10_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.10_Constant_5_attr__value: "0x080000000100000000000000", __transformer_blocks.10_Constant_6_attr__value: "0x080000000100000000000000", __transformer_blocks.10_Constant_7_attr__value: "0x08000000803F", __transformer_blocks.10_Constant_8_attr__value: "0x080000000100000000000000", __transformer_blocks.10_ff_context_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.10_ff_context_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.10_ff_context_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.10_ff_context_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.10_Constant_9_attr__value: "0x080000000100000000000000", __transformer_blocks.11_norm1_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.11_norm1_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.11_norm1_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.11_norm1_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.11_norm1_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.11_norm1_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.11_norm1_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.11_norm1_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.11_norm1_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.11_norm1_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.11_norm1_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.11_norm1_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.11_norm1_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.11_norm1_context_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.11_norm1_context_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.11_norm1_context_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.11_norm1_context_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.11_norm1_context_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.11_norm1_context_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.11_norm1_context_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.11_norm1_context_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.11_norm1_context_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.11_norm1_context_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.11_norm1_context_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.11_norm1_context_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.11_norm1_context_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.11_attn_Constant_attr__value: "0x080000000000000000000000", __transformer_blocks.11_attn_Constant_1_attr__value: "0x080000000200000000000000", __transformer_blocks.11_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_10425_attr__value: "0x080000000000000000000000", __transformer_blocks.11_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.11_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_10429_attr__value: "0x080000000000000000000000", _Constant_10432_attr__value: "0x080000000000000000000000", __transformer_blocks.11_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.11_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_10436_attr__value: "0x080000000000000000000000", _Constant_10439_attr__value: "0x080000000000000000000000", __transformer_blocks.11_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.11_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_10443_attr__value: "0x080000000000000000000000", _Constant_10446_attr__value: "0x080000000000000000000000", __transformer_blocks.11_attn_Constant_9_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.11_attn_Constant_10_attr__value: "0x080000001800000000000000", _Constant_10450_attr__value: "0x080000000000000000000000", _Constant_10453_attr__value: "0x080000000000000000000000", __transformer_blocks.11_attn_Constant_11_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.11_attn_Constant_12_attr__value: "0x080000001800000000000000", _Constant_10457_attr__value: "0x080000000000000000000000", _Constant_10460_attr__value: "0x080000000000000000000000", __transformer_blocks.11_attn_Constant_13_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.11_attn_Constant_14_attr__value: "0x080000001800000000000000", _Constant_10464_attr__value: "0x080000000000000000000000", __transformer_blocks.11_attn_norm_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.11_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.11_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.11_attn_norm_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.11_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.11_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.11_attn_norm_added_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.11_attn_norm_added_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.11_attn_norm_added_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.11_attn_norm_added_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.11_attn_norm_added_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.11_attn_norm_added_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.11_attn_Constant_15_attr__value: "0x080000000000000000000000", __transformer_blocks.11_attn_Constant_16_attr__value: "0x080000000100000000000000", __transformer_blocks.11_attn_Constant_17_attr__value: "0x080000000200000000000000", _Constant_10549_attr__value: "0x080000000000000000000000", _Constant_10551_attr__value: "0x080000000000000000000000", _Constant_10553_attr__value: "0x080000000000000000000000", __transformer_blocks.11_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.11_attn_Constant_19_attr__value: "0x080000000200000000000000", __transformer_blocks.11_attn_Constant_20_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.11_attn_Constant_21_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.11_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.11_attn_Constant_23_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.11_attn_Constant_24_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.11_attn_Constant_25_attr__value: "0x080000000000000000000000", __transformer_blocks.11_attn_Constant_26_attr__value: "0x080000000000000000000000", __transformer_blocks.11_attn_Constant_27_attr__value: "0x080000000300000000000000", __transformer_blocks.11_attn_Constant_28_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.11_attn_Constant_29_attr__value: "0x080000000000000000000000", __transformer_blocks.11_attn_Constant_30_attr__value: "0x080000000100000000000000", __transformer_blocks.11_attn_Constant_31_attr__value: "0x080000000200000000000000", _Constant_10594_attr__value: "0x080000000000000000000000", _Constant_10596_attr__value: "0x080000000000000000000000", _Constant_10598_attr__value: "0x080000000000000000000000", __transformer_blocks.11_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.11_attn_Constant_33_attr__value: "0x080000000200000000000000", __transformer_blocks.11_attn_Constant_34_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.11_attn_Constant_35_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.11_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.11_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.11_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.11_attn_Constant_39_attr__value: "0x080000000000000000000000", __transformer_blocks.11_attn_Constant_40_attr__value: "0x080000000000000000000000", __transformer_blocks.11_attn_Constant_41_attr__value: "0x080000000300000000000000", __transformer_blocks.11_attn_Constant_42_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.11_attn_Constant_43_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.11_attn_Constant_44_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.11_attn_Constant_45_attr__value: "0x080000000000803F", __transformer_blocks.11_attn_Constant_46_attr__value: "0x080000001800000000000000", _Constant_10651_attr__value: "0x080000000000000000000000", __transformer_blocks.11_attn_Constant_47_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_10654_attr__value: "0x080000000000000000000000", __transformer_blocks.11_attn_Constant_48_attr__value: "0x080000000100000000000000", __transformer_blocks.11_attn_Constant_49_attr__value: "0x080000000100000000000000", __transformer_blocks.11_attn_Constant_50_attr__value: "0x080000000000000000000000", __transformer_blocks.11_attn_Constant_51_attr__value: "0x080000000000000000000000", __transformer_blocks.11_attn_Constant_52_attr__value: "0x080000000100000000000000", __transformer_blocks.11_attn_Constant_53_attr__value: "0x080000000100000000000000", __transformer_blocks.11_attn_Constant_54_attr__value: "0x080000000000000000000000", __transformer_blocks.11_attn_Constant_55_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.11_attn_Constant_56_attr__value: "0x080000000100000000000000", __transformer_blocks.11_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.11_Constant_1_attr__value: "0x080000000100000000000000", __transformer_blocks.11_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.11_Constant_3_attr__value: "0x080000000100000000000000", __transformer_blocks.11_ff_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.11_ff_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.11_ff_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.11_ff_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.11_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.11_Constant_5_attr__value: "0x080000000100000000000000", __transformer_blocks.11_Constant_6_attr__value: "0x080000000100000000000000", __transformer_blocks.11_Constant_7_attr__value: "0x08000000803F", __transformer_blocks.11_Constant_8_attr__value: "0x080000000100000000000000", __transformer_blocks.11_ff_context_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.11_ff_context_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.11_ff_context_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.11_ff_context_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.11_Constant_9_attr__value: "0x080000000100000000000000", __transformer_blocks.12_norm1_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.12_norm1_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.12_norm1_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.12_norm1_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.12_norm1_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.12_norm1_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.12_norm1_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.12_norm1_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.12_norm1_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.12_norm1_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.12_norm1_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.12_norm1_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.12_norm1_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.12_norm1_context_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.12_norm1_context_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.12_norm1_context_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.12_norm1_context_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.12_norm1_context_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.12_norm1_context_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.12_norm1_context_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.12_norm1_context_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.12_norm1_context_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.12_norm1_context_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.12_norm1_context_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.12_norm1_context_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.12_norm1_context_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.12_attn_Constant_attr__value: "0x080000000000000000000000", __transformer_blocks.12_attn_Constant_1_attr__value: "0x080000000200000000000000", __transformer_blocks.12_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_10842_attr__value: "0x080000000000000000000000", __transformer_blocks.12_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.12_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_10846_attr__value: "0x080000000000000000000000", _Constant_10849_attr__value: "0x080000000000000000000000", __transformer_blocks.12_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.12_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_10853_attr__value: "0x080000000000000000000000", _Constant_10856_attr__value: "0x080000000000000000000000", __transformer_blocks.12_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.12_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_10860_attr__value: "0x080000000000000000000000", _Constant_10863_attr__value: "0x080000000000000000000000", __transformer_blocks.12_attn_Constant_9_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.12_attn_Constant_10_attr__value: "0x080000001800000000000000", _Constant_10867_attr__value: "0x080000000000000000000000", _Constant_10870_attr__value: "0x080000000000000000000000", __transformer_blocks.12_attn_Constant_11_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.12_attn_Constant_12_attr__value: "0x080000001800000000000000", _Constant_10874_attr__value: "0x080000000000000000000000", _Constant_10877_attr__value: "0x080000000000000000000000", __transformer_blocks.12_attn_Constant_13_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.12_attn_Constant_14_attr__value: "0x080000001800000000000000", _Constant_10881_attr__value: "0x080000000000000000000000", __transformer_blocks.12_attn_norm_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.12_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.12_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.12_attn_norm_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.12_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.12_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.12_attn_norm_added_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.12_attn_norm_added_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.12_attn_norm_added_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.12_attn_norm_added_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.12_attn_norm_added_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.12_attn_norm_added_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.12_attn_Constant_15_attr__value: "0x080000000000000000000000", __transformer_blocks.12_attn_Constant_16_attr__value: "0x080000000100000000000000", __transformer_blocks.12_attn_Constant_17_attr__value: "0x080000000200000000000000", _Constant_10966_attr__value: "0x080000000000000000000000", _Constant_10968_attr__value: "0x080000000000000000000000", _Constant_10970_attr__value: "0x080000000000000000000000", __transformer_blocks.12_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.12_attn_Constant_19_attr__value: "0x080000000200000000000000", __transformer_blocks.12_attn_Constant_20_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.12_attn_Constant_21_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.12_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.12_attn_Constant_23_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.12_attn_Constant_24_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.12_attn_Constant_25_attr__value: "0x080000000000000000000000", __transformer_blocks.12_attn_Constant_26_attr__value: "0x080000000000000000000000", __transformer_blocks.12_attn_Constant_27_attr__value: "0x080000000300000000000000", __transformer_blocks.12_attn_Constant_28_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.12_attn_Constant_29_attr__value: "0x080000000000000000000000", __transformer_blocks.12_attn_Constant_30_attr__value: "0x080000000100000000000000", __transformer_blocks.12_attn_Constant_31_attr__value: "0x080000000200000000000000", _Constant_11011_attr__value: "0x080000000000000000000000", _Constant_11013_attr__value: "0x080000000000000000000000", _Constant_11015_attr__value: "0x080000000000000000000000", __transformer_blocks.12_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.12_attn_Constant_33_attr__value: "0x080000000200000000000000", __transformer_blocks.12_attn_Constant_34_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.12_attn_Constant_35_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.12_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.12_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.12_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.12_attn_Constant_39_attr__value: "0x080000000000000000000000", __transformer_blocks.12_attn_Constant_40_attr__value: "0x080000000000000000000000", __transformer_blocks.12_attn_Constant_41_attr__value: "0x080000000300000000000000", __transformer_blocks.12_attn_Constant_42_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.12_attn_Constant_43_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.12_attn_Constant_44_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.12_attn_Constant_45_attr__value: "0x080000000000803F", __transformer_blocks.12_attn_Constant_46_attr__value: "0x080000001800000000000000", _Constant_11068_attr__value: "0x080000000000000000000000", __transformer_blocks.12_attn_Constant_47_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_11071_attr__value: "0x080000000000000000000000", __transformer_blocks.12_attn_Constant_48_attr__value: "0x080000000100000000000000", __transformer_blocks.12_attn_Constant_49_attr__value: "0x080000000100000000000000", __transformer_blocks.12_attn_Constant_50_attr__value: "0x080000000000000000000000", __transformer_blocks.12_attn_Constant_51_attr__value: "0x080000000000000000000000", __transformer_blocks.12_attn_Constant_52_attr__value: "0x080000000100000000000000", __transformer_blocks.12_attn_Constant_53_attr__value: "0x080000000100000000000000", __transformer_blocks.12_attn_Constant_54_attr__value: "0x080000000000000000000000", __transformer_blocks.12_attn_Constant_55_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.12_attn_Constant_56_attr__value: "0x080000000100000000000000", __transformer_blocks.12_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.12_Constant_1_attr__value: "0x080000000100000000000000", __transformer_blocks.12_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.12_Constant_3_attr__value: "0x080000000100000000000000", __transformer_blocks.12_ff_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.12_ff_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.12_ff_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.12_ff_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.12_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.12_Constant_5_attr__value: "0x080000000100000000000000", __transformer_blocks.12_Constant_6_attr__value: "0x080000000100000000000000", __transformer_blocks.12_Constant_7_attr__value: "0x08000000803F", __transformer_blocks.12_Constant_8_attr__value: "0x080000000100000000000000", __transformer_blocks.12_ff_context_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.12_ff_context_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.12_ff_context_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.12_ff_context_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.12_Constant_9_attr__value: "0x080000000100000000000000", __transformer_blocks.13_norm1_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.13_norm1_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.13_norm1_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.13_norm1_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.13_norm1_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.13_norm1_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.13_norm1_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.13_norm1_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.13_norm1_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.13_norm1_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.13_norm1_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.13_norm1_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.13_norm1_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.13_norm1_context_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.13_norm1_context_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.13_norm1_context_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.13_norm1_context_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.13_norm1_context_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.13_norm1_context_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.13_norm1_context_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.13_norm1_context_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.13_norm1_context_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.13_norm1_context_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.13_norm1_context_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.13_norm1_context_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.13_norm1_context_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.13_attn_Constant_attr__value: "0x080000000000000000000000", __transformer_blocks.13_attn_Constant_1_attr__value: "0x080000000200000000000000", __transformer_blocks.13_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_11259_attr__value: "0x080000000000000000000000", __transformer_blocks.13_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.13_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_11263_attr__value: "0x080000000000000000000000", _Constant_11266_attr__value: "0x080000000000000000000000", __transformer_blocks.13_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.13_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_11270_attr__value: "0x080000000000000000000000", _Constant_11273_attr__value: "0x080000000000000000000000", __transformer_blocks.13_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.13_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_11277_attr__value: "0x080000000000000000000000", _Constant_11280_attr__value: "0x080000000000000000000000", __transformer_blocks.13_attn_Constant_9_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.13_attn_Constant_10_attr__value: "0x080000001800000000000000", _Constant_11284_attr__value: "0x080000000000000000000000", _Constant_11287_attr__value: "0x080000000000000000000000", __transformer_blocks.13_attn_Constant_11_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.13_attn_Constant_12_attr__value: "0x080000001800000000000000", _Constant_11291_attr__value: "0x080000000000000000000000", _Constant_11294_attr__value: "0x080000000000000000000000", __transformer_blocks.13_attn_Constant_13_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.13_attn_Constant_14_attr__value: "0x080000001800000000000000", _Constant_11298_attr__value: "0x080000000000000000000000", __transformer_blocks.13_attn_norm_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.13_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.13_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.13_attn_norm_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.13_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.13_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.13_attn_norm_added_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.13_attn_norm_added_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.13_attn_norm_added_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.13_attn_norm_added_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.13_attn_norm_added_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.13_attn_norm_added_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.13_attn_Constant_15_attr__value: "0x080000000000000000000000", __transformer_blocks.13_attn_Constant_16_attr__value: "0x080000000100000000000000", __transformer_blocks.13_attn_Constant_17_attr__value: "0x080000000200000000000000", _Constant_11383_attr__value: "0x080000000000000000000000", _Constant_11385_attr__value: "0x080000000000000000000000", _Constant_11387_attr__value: "0x080000000000000000000000", __transformer_blocks.13_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.13_attn_Constant_19_attr__value: "0x080000000200000000000000", __transformer_blocks.13_attn_Constant_20_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.13_attn_Constant_21_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.13_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.13_attn_Constant_23_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.13_attn_Constant_24_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.13_attn_Constant_25_attr__value: "0x080000000000000000000000", __transformer_blocks.13_attn_Constant_26_attr__value: "0x080000000000000000000000", __transformer_blocks.13_attn_Constant_27_attr__value: "0x080000000300000000000000", __transformer_blocks.13_attn_Constant_28_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.13_attn_Constant_29_attr__value: "0x080000000000000000000000", __transformer_blocks.13_attn_Constant_30_attr__value: "0x080000000100000000000000", __transformer_blocks.13_attn_Constant_31_attr__value: "0x080000000200000000000000", _Constant_11428_attr__value: "0x080000000000000000000000", _Constant_11430_attr__value: "0x080000000000000000000000", _Constant_11432_attr__value: "0x080000000000000000000000", __transformer_blocks.13_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.13_attn_Constant_33_attr__value: "0x080000000200000000000000", __transformer_blocks.13_attn_Constant_34_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.13_attn_Constant_35_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.13_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.13_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.13_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.13_attn_Constant_39_attr__value: "0x080000000000000000000000", __transformer_blocks.13_attn_Constant_40_attr__value: "0x080000000000000000000000", __transformer_blocks.13_attn_Constant_41_attr__value: "0x080000000300000000000000", __transformer_blocks.13_attn_Constant_42_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.13_attn_Constant_43_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.13_attn_Constant_44_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.13_attn_Constant_45_attr__value: "0x080000000000803F", __transformer_blocks.13_attn_Constant_46_attr__value: "0x080000001800000000000000", _Constant_11485_attr__value: "0x080000000000000000000000", __transformer_blocks.13_attn_Constant_47_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_11488_attr__value: "0x080000000000000000000000", __transformer_blocks.13_attn_Constant_48_attr__value: "0x080000000100000000000000", __transformer_blocks.13_attn_Constant_49_attr__value: "0x080000000100000000000000", __transformer_blocks.13_attn_Constant_50_attr__value: "0x080000000000000000000000", __transformer_blocks.13_attn_Constant_51_attr__value: "0x080000000000000000000000", __transformer_blocks.13_attn_Constant_52_attr__value: "0x080000000100000000000000", __transformer_blocks.13_attn_Constant_53_attr__value: "0x080000000100000000000000", __transformer_blocks.13_attn_Constant_54_attr__value: "0x080000000000000000000000", __transformer_blocks.13_attn_Constant_55_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.13_attn_Constant_56_attr__value: "0x080000000100000000000000", __transformer_blocks.13_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.13_Constant_1_attr__value: "0x080000000100000000000000", __transformer_blocks.13_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.13_Constant_3_attr__value: "0x080000000100000000000000", __transformer_blocks.13_ff_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.13_ff_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.13_ff_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.13_ff_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.13_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.13_Constant_5_attr__value: "0x080000000100000000000000", __transformer_blocks.13_Constant_6_attr__value: "0x080000000100000000000000", __transformer_blocks.13_Constant_7_attr__value: "0x08000000803F", __transformer_blocks.13_Constant_8_attr__value: "0x080000000100000000000000", __transformer_blocks.13_ff_context_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.13_ff_context_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.13_ff_context_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.13_ff_context_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.13_Constant_9_attr__value: "0x080000000100000000000000", __transformer_blocks.14_norm1_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.14_norm1_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.14_norm1_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.14_norm1_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.14_norm1_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.14_norm1_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.14_norm1_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.14_norm1_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.14_norm1_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.14_norm1_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.14_norm1_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.14_norm1_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.14_norm1_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.14_norm1_context_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.14_norm1_context_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.14_norm1_context_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.14_norm1_context_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.14_norm1_context_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.14_norm1_context_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.14_norm1_context_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.14_norm1_context_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.14_norm1_context_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.14_norm1_context_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.14_norm1_context_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.14_norm1_context_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.14_norm1_context_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.14_attn_Constant_attr__value: "0x080000000000000000000000", __transformer_blocks.14_attn_Constant_1_attr__value: "0x080000000200000000000000", __transformer_blocks.14_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_11676_attr__value: "0x080000000000000000000000", __transformer_blocks.14_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.14_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_11680_attr__value: "0x080000000000000000000000", _Constant_11683_attr__value: "0x080000000000000000000000", __transformer_blocks.14_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.14_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_11687_attr__value: "0x080000000000000000000000", _Constant_11690_attr__value: "0x080000000000000000000000", __transformer_blocks.14_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.14_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_11694_attr__value: "0x080000000000000000000000", _Constant_11697_attr__value: "0x080000000000000000000000", __transformer_blocks.14_attn_Constant_9_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.14_attn_Constant_10_attr__value: "0x080000001800000000000000", _Constant_11701_attr__value: "0x080000000000000000000000", _Constant_11704_attr__value: "0x080000000000000000000000", __transformer_blocks.14_attn_Constant_11_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.14_attn_Constant_12_attr__value: "0x080000001800000000000000", _Constant_11708_attr__value: "0x080000000000000000000000", _Constant_11711_attr__value: "0x080000000000000000000000", __transformer_blocks.14_attn_Constant_13_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.14_attn_Constant_14_attr__value: "0x080000001800000000000000", _Constant_11715_attr__value: "0x080000000000000000000000", __transformer_blocks.14_attn_norm_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.14_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.14_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.14_attn_norm_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.14_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.14_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.14_attn_norm_added_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.14_attn_norm_added_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.14_attn_norm_added_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.14_attn_norm_added_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.14_attn_norm_added_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.14_attn_norm_added_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.14_attn_Constant_15_attr__value: "0x080000000000000000000000", __transformer_blocks.14_attn_Constant_16_attr__value: "0x080000000100000000000000", __transformer_blocks.14_attn_Constant_17_attr__value: "0x080000000200000000000000", _Constant_11800_attr__value: "0x080000000000000000000000", _Constant_11802_attr__value: "0x080000000000000000000000", _Constant_11804_attr__value: "0x080000000000000000000000", __transformer_blocks.14_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.14_attn_Constant_19_attr__value: "0x080000000200000000000000", __transformer_blocks.14_attn_Constant_20_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.14_attn_Constant_21_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.14_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.14_attn_Constant_23_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.14_attn_Constant_24_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.14_attn_Constant_25_attr__value: "0x080000000000000000000000", __transformer_blocks.14_attn_Constant_26_attr__value: "0x080000000000000000000000", __transformer_blocks.14_attn_Constant_27_attr__value: "0x080000000300000000000000", __transformer_blocks.14_attn_Constant_28_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.14_attn_Constant_29_attr__value: "0x080000000000000000000000", __transformer_blocks.14_attn_Constant_30_attr__value: "0x080000000100000000000000", __transformer_blocks.14_attn_Constant_31_attr__value: "0x080000000200000000000000", _Constant_11845_attr__value: "0x080000000000000000000000", _Constant_11847_attr__value: "0x080000000000000000000000", _Constant_11849_attr__value: "0x080000000000000000000000", __transformer_blocks.14_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.14_attn_Constant_33_attr__value: "0x080000000200000000000000", __transformer_blocks.14_attn_Constant_34_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.14_attn_Constant_35_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.14_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.14_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.14_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.14_attn_Constant_39_attr__value: "0x080000000000000000000000", __transformer_blocks.14_attn_Constant_40_attr__value: "0x080000000000000000000000", __transformer_blocks.14_attn_Constant_41_attr__value: "0x080000000300000000000000", __transformer_blocks.14_attn_Constant_42_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.14_attn_Constant_43_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.14_attn_Constant_44_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.14_attn_Constant_45_attr__value: "0x080000000000803F", __transformer_blocks.14_attn_Constant_46_attr__value: "0x080000001800000000000000", _Constant_11902_attr__value: "0x080000000000000000000000", __transformer_blocks.14_attn_Constant_47_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_11905_attr__value: "0x080000000000000000000000", __transformer_blocks.14_attn_Constant_48_attr__value: "0x080000000100000000000000", __transformer_blocks.14_attn_Constant_49_attr__value: "0x080000000100000000000000", __transformer_blocks.14_attn_Constant_50_attr__value: "0x080000000000000000000000", __transformer_blocks.14_attn_Constant_51_attr__value: "0x080000000000000000000000", __transformer_blocks.14_attn_Constant_52_attr__value: "0x080000000100000000000000", __transformer_blocks.14_attn_Constant_53_attr__value: "0x080000000100000000000000", __transformer_blocks.14_attn_Constant_54_attr__value: "0x080000000000000000000000", __transformer_blocks.14_attn_Constant_55_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.14_attn_Constant_56_attr__value: "0x080000000100000000000000", __transformer_blocks.14_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.14_Constant_1_attr__value: "0x080000000100000000000000", __transformer_blocks.14_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.14_Constant_3_attr__value: "0x080000000100000000000000", __transformer_blocks.14_ff_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.14_ff_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.14_ff_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.14_ff_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.14_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.14_Constant_5_attr__value: "0x080000000100000000000000", __transformer_blocks.14_Constant_6_attr__value: "0x080000000100000000000000", __transformer_blocks.14_Constant_7_attr__value: "0x08000000803F", __transformer_blocks.14_Constant_8_attr__value: "0x080000000100000000000000", __transformer_blocks.14_ff_context_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.14_ff_context_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.14_ff_context_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.14_ff_context_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.14_Constant_9_attr__value: "0x080000000100000000000000", __transformer_blocks.15_norm1_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.15_norm1_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.15_norm1_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.15_norm1_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.15_norm1_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.15_norm1_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.15_norm1_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.15_norm1_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.15_norm1_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.15_norm1_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.15_norm1_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.15_norm1_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.15_norm1_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.15_norm1_context_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.15_norm1_context_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.15_norm1_context_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.15_norm1_context_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.15_norm1_context_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.15_norm1_context_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.15_norm1_context_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.15_norm1_context_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.15_norm1_context_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.15_norm1_context_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.15_norm1_context_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.15_norm1_context_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.15_norm1_context_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.15_attn_Constant_attr__value: "0x080000000000000000000000", __transformer_blocks.15_attn_Constant_1_attr__value: "0x080000000200000000000000", __transformer_blocks.15_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_12093_attr__value: "0x080000000000000000000000", __transformer_blocks.15_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.15_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_12097_attr__value: "0x080000000000000000000000", _Constant_12100_attr__value: "0x080000000000000000000000", __transformer_blocks.15_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.15_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_12104_attr__value: "0x080000000000000000000000", _Constant_12107_attr__value: "0x080000000000000000000000", __transformer_blocks.15_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.15_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_12111_attr__value: "0x080000000000000000000000", _Constant_12114_attr__value: "0x080000000000000000000000", __transformer_blocks.15_attn_Constant_9_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.15_attn_Constant_10_attr__value: "0x080000001800000000000000", _Constant_12118_attr__value: "0x080000000000000000000000", _Constant_12121_attr__value: "0x080000000000000000000000", __transformer_blocks.15_attn_Constant_11_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.15_attn_Constant_12_attr__value: "0x080000001800000000000000", _Constant_12125_attr__value: "0x080000000000000000000000", _Constant_12128_attr__value: "0x080000000000000000000000", __transformer_blocks.15_attn_Constant_13_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.15_attn_Constant_14_attr__value: "0x080000001800000000000000", _Constant_12132_attr__value: "0x080000000000000000000000", __transformer_blocks.15_attn_norm_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.15_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.15_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.15_attn_norm_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.15_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.15_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.15_attn_norm_added_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.15_attn_norm_added_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.15_attn_norm_added_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.15_attn_norm_added_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.15_attn_norm_added_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.15_attn_norm_added_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.15_attn_Constant_15_attr__value: "0x080000000000000000000000", __transformer_blocks.15_attn_Constant_16_attr__value: "0x080000000100000000000000", __transformer_blocks.15_attn_Constant_17_attr__value: "0x080000000200000000000000", _Constant_12217_attr__value: "0x080000000000000000000000", _Constant_12219_attr__value: "0x080000000000000000000000", _Constant_12221_attr__value: "0x080000000000000000000000", __transformer_blocks.15_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.15_attn_Constant_19_attr__value: "0x080000000200000000000000", __transformer_blocks.15_attn_Constant_20_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.15_attn_Constant_21_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.15_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.15_attn_Constant_23_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.15_attn_Constant_24_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.15_attn_Constant_25_attr__value: "0x080000000000000000000000", __transformer_blocks.15_attn_Constant_26_attr__value: "0x080000000000000000000000", __transformer_blocks.15_attn_Constant_27_attr__value: "0x080000000300000000000000", __transformer_blocks.15_attn_Constant_28_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.15_attn_Constant_29_attr__value: "0x080000000000000000000000", __transformer_blocks.15_attn_Constant_30_attr__value: "0x080000000100000000000000", __transformer_blocks.15_attn_Constant_31_attr__value: "0x080000000200000000000000", _Constant_12262_attr__value: "0x080000000000000000000000", _Constant_12264_attr__value: "0x080000000000000000000000", _Constant_12266_attr__value: "0x080000000000000000000000", __transformer_blocks.15_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.15_attn_Constant_33_attr__value: "0x080000000200000000000000", __transformer_blocks.15_attn_Constant_34_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.15_attn_Constant_35_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.15_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.15_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.15_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.15_attn_Constant_39_attr__value: "0x080000000000000000000000", __transformer_blocks.15_attn_Constant_40_attr__value: "0x080000000000000000000000", __transformer_blocks.15_attn_Constant_41_attr__value: "0x080000000300000000000000", __transformer_blocks.15_attn_Constant_42_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.15_attn_Constant_43_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.15_attn_Constant_44_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.15_attn_Constant_45_attr__value: "0x080000000000803F", __transformer_blocks.15_attn_Constant_46_attr__value: "0x080000001800000000000000", _Constant_12319_attr__value: "0x080000000000000000000000", __transformer_blocks.15_attn_Constant_47_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_12322_attr__value: "0x080000000000000000000000", __transformer_blocks.15_attn_Constant_48_attr__value: "0x080000000100000000000000", __transformer_blocks.15_attn_Constant_49_attr__value: "0x080000000100000000000000", __transformer_blocks.15_attn_Constant_50_attr__value: "0x080000000000000000000000", __transformer_blocks.15_attn_Constant_51_attr__value: "0x080000000000000000000000", __transformer_blocks.15_attn_Constant_52_attr__value: "0x080000000100000000000000", __transformer_blocks.15_attn_Constant_53_attr__value: "0x080000000100000000000000", __transformer_blocks.15_attn_Constant_54_attr__value: "0x080000000000000000000000", __transformer_blocks.15_attn_Constant_55_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.15_attn_Constant_56_attr__value: "0x080000000100000000000000", __transformer_blocks.15_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.15_Constant_1_attr__value: "0x080000000100000000000000", __transformer_blocks.15_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.15_Constant_3_attr__value: "0x080000000100000000000000", __transformer_blocks.15_ff_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.15_ff_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.15_ff_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.15_ff_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.15_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.15_Constant_5_attr__value: "0x080000000100000000000000", __transformer_blocks.15_Constant_6_attr__value: "0x080000000100000000000000", __transformer_blocks.15_Constant_7_attr__value: "0x08000000803F", __transformer_blocks.15_Constant_8_attr__value: "0x080000000100000000000000", __transformer_blocks.15_ff_context_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.15_ff_context_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.15_ff_context_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.15_ff_context_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.15_Constant_9_attr__value: "0x080000000100000000000000", __transformer_blocks.16_norm1_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.16_norm1_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.16_norm1_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.16_norm1_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.16_norm1_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.16_norm1_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.16_norm1_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.16_norm1_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.16_norm1_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.16_norm1_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.16_norm1_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.16_norm1_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.16_norm1_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.16_norm1_context_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.16_norm1_context_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.16_norm1_context_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.16_norm1_context_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.16_norm1_context_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.16_norm1_context_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.16_norm1_context_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.16_norm1_context_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.16_norm1_context_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.16_norm1_context_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.16_norm1_context_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.16_norm1_context_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.16_norm1_context_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.16_attn_Constant_attr__value: "0x080000000000000000000000", __transformer_blocks.16_attn_Constant_1_attr__value: "0x080000000200000000000000", __transformer_blocks.16_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_12510_attr__value: "0x080000000000000000000000", __transformer_blocks.16_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.16_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_12514_attr__value: "0x080000000000000000000000", _Constant_12517_attr__value: "0x080000000000000000000000", __transformer_blocks.16_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.16_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_12521_attr__value: "0x080000000000000000000000", _Constant_12524_attr__value: "0x080000000000000000000000", __transformer_blocks.16_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.16_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_12528_attr__value: "0x080000000000000000000000", _Constant_12531_attr__value: "0x080000000000000000000000", __transformer_blocks.16_attn_Constant_9_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.16_attn_Constant_10_attr__value: "0x080000001800000000000000", _Constant_12535_attr__value: "0x080000000000000000000000", _Constant_12538_attr__value: "0x080000000000000000000000", __transformer_blocks.16_attn_Constant_11_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.16_attn_Constant_12_attr__value: "0x080000001800000000000000", _Constant_12542_attr__value: "0x080000000000000000000000", _Constant_12545_attr__value: "0x080000000000000000000000", __transformer_blocks.16_attn_Constant_13_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.16_attn_Constant_14_attr__value: "0x080000001800000000000000", _Constant_12549_attr__value: "0x080000000000000000000000", __transformer_blocks.16_attn_norm_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.16_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.16_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.16_attn_norm_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.16_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.16_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.16_attn_norm_added_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.16_attn_norm_added_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.16_attn_norm_added_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.16_attn_norm_added_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.16_attn_norm_added_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.16_attn_norm_added_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.16_attn_Constant_15_attr__value: "0x080000000000000000000000", __transformer_blocks.16_attn_Constant_16_attr__value: "0x080000000100000000000000", __transformer_blocks.16_attn_Constant_17_attr__value: "0x080000000200000000000000", _Constant_12634_attr__value: "0x080000000000000000000000", _Constant_12636_attr__value: "0x080000000000000000000000", _Constant_12638_attr__value: "0x080000000000000000000000", __transformer_blocks.16_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.16_attn_Constant_19_attr__value: "0x080000000200000000000000", __transformer_blocks.16_attn_Constant_20_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.16_attn_Constant_21_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.16_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.16_attn_Constant_23_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.16_attn_Constant_24_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.16_attn_Constant_25_attr__value: "0x080000000000000000000000", __transformer_blocks.16_attn_Constant_26_attr__value: "0x080000000000000000000000", __transformer_blocks.16_attn_Constant_27_attr__value: "0x080000000300000000000000", __transformer_blocks.16_attn_Constant_28_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.16_attn_Constant_29_attr__value: "0x080000000000000000000000", __transformer_blocks.16_attn_Constant_30_attr__value: "0x080000000100000000000000", __transformer_blocks.16_attn_Constant_31_attr__value: "0x080000000200000000000000", _Constant_12679_attr__value: "0x080000000000000000000000", _Constant_12681_attr__value: "0x080000000000000000000000", _Constant_12683_attr__value: "0x080000000000000000000000", __transformer_blocks.16_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.16_attn_Constant_33_attr__value: "0x080000000200000000000000", __transformer_blocks.16_attn_Constant_34_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.16_attn_Constant_35_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.16_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.16_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.16_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.16_attn_Constant_39_attr__value: "0x080000000000000000000000", __transformer_blocks.16_attn_Constant_40_attr__value: "0x080000000000000000000000", __transformer_blocks.16_attn_Constant_41_attr__value: "0x080000000300000000000000", __transformer_blocks.16_attn_Constant_42_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.16_attn_Constant_43_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.16_attn_Constant_44_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.16_attn_Constant_45_attr__value: "0x080000000000803F", __transformer_blocks.16_attn_Constant_46_attr__value: "0x080000001800000000000000", _Constant_12736_attr__value: "0x080000000000000000000000", __transformer_blocks.16_attn_Constant_47_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_12739_attr__value: "0x080000000000000000000000", __transformer_blocks.16_attn_Constant_48_attr__value: "0x080000000100000000000000", __transformer_blocks.16_attn_Constant_49_attr__value: "0x080000000100000000000000", __transformer_blocks.16_attn_Constant_50_attr__value: "0x080000000000000000000000", __transformer_blocks.16_attn_Constant_51_attr__value: "0x080000000000000000000000", __transformer_blocks.16_attn_Constant_52_attr__value: "0x080000000100000000000000", __transformer_blocks.16_attn_Constant_53_attr__value: "0x080000000100000000000000", __transformer_blocks.16_attn_Constant_54_attr__value: "0x080000000000000000000000", __transformer_blocks.16_attn_Constant_55_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.16_attn_Constant_56_attr__value: "0x080000000100000000000000", __transformer_blocks.16_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.16_Constant_1_attr__value: "0x080000000100000000000000", __transformer_blocks.16_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.16_Constant_3_attr__value: "0x080000000100000000000000", __transformer_blocks.16_ff_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.16_ff_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.16_ff_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.16_ff_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.16_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.16_Constant_5_attr__value: "0x080000000100000000000000", __transformer_blocks.16_Constant_6_attr__value: "0x080000000100000000000000", __transformer_blocks.16_Constant_7_attr__value: "0x08000000803F", __transformer_blocks.16_Constant_8_attr__value: "0x080000000100000000000000", __transformer_blocks.16_ff_context_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.16_ff_context_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.16_ff_context_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.16_ff_context_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.16_Constant_9_attr__value: "0x080000000100000000000000", __transformer_blocks.17_norm1_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.17_norm1_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.17_norm1_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.17_norm1_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.17_norm1_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.17_norm1_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.17_norm1_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.17_norm1_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.17_norm1_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.17_norm1_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.17_norm1_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.17_norm1_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.17_norm1_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.17_norm1_context_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.17_norm1_context_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.17_norm1_context_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.17_norm1_context_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.17_norm1_context_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.17_norm1_context_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.17_norm1_context_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.17_norm1_context_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.17_norm1_context_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.17_norm1_context_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.17_norm1_context_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.17_norm1_context_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.17_norm1_context_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.17_attn_Constant_attr__value: "0x080000000000000000000000", __transformer_blocks.17_attn_Constant_1_attr__value: "0x080000000200000000000000", __transformer_blocks.17_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_12927_attr__value: "0x080000000000000000000000", __transformer_blocks.17_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.17_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_12931_attr__value: "0x080000000000000000000000", _Constant_12934_attr__value: "0x080000000000000000000000", __transformer_blocks.17_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.17_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_12938_attr__value: "0x080000000000000000000000", _Constant_12941_attr__value: "0x080000000000000000000000", __transformer_blocks.17_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.17_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_12945_attr__value: "0x080000000000000000000000", _Constant_12948_attr__value: "0x080000000000000000000000", __transformer_blocks.17_attn_Constant_9_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.17_attn_Constant_10_attr__value: "0x080000001800000000000000", _Constant_12952_attr__value: "0x080000000000000000000000", _Constant_12955_attr__value: "0x080000000000000000000000", __transformer_blocks.17_attn_Constant_11_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.17_attn_Constant_12_attr__value: "0x080000001800000000000000", _Constant_12959_attr__value: "0x080000000000000000000000", _Constant_12962_attr__value: "0x080000000000000000000000", __transformer_blocks.17_attn_Constant_13_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.17_attn_Constant_14_attr__value: "0x080000001800000000000000", _Constant_12966_attr__value: "0x080000000000000000000000", __transformer_blocks.17_attn_norm_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.17_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.17_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.17_attn_norm_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.17_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.17_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.17_attn_norm_added_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.17_attn_norm_added_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.17_attn_norm_added_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.17_attn_norm_added_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.17_attn_norm_added_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.17_attn_norm_added_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.17_attn_Constant_15_attr__value: "0x080000000000000000000000", __transformer_blocks.17_attn_Constant_16_attr__value: "0x080000000100000000000000", __transformer_blocks.17_attn_Constant_17_attr__value: "0x080000000200000000000000", _Constant_13051_attr__value: "0x080000000000000000000000", _Constant_13053_attr__value: "0x080000000000000000000000", _Constant_13055_attr__value: "0x080000000000000000000000", __transformer_blocks.17_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.17_attn_Constant_19_attr__value: "0x080000000200000000000000", __transformer_blocks.17_attn_Constant_20_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.17_attn_Constant_21_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.17_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.17_attn_Constant_23_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.17_attn_Constant_24_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.17_attn_Constant_25_attr__value: "0x080000000000000000000000", __transformer_blocks.17_attn_Constant_26_attr__value: "0x080000000000000000000000", __transformer_blocks.17_attn_Constant_27_attr__value: "0x080000000300000000000000", __transformer_blocks.17_attn_Constant_28_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.17_attn_Constant_29_attr__value: "0x080000000000000000000000", __transformer_blocks.17_attn_Constant_30_attr__value: "0x080000000100000000000000", __transformer_blocks.17_attn_Constant_31_attr__value: "0x080000000200000000000000", _Constant_13096_attr__value: "0x080000000000000000000000", _Constant_13098_attr__value: "0x080000000000000000000000", _Constant_13100_attr__value: "0x080000000000000000000000", __transformer_blocks.17_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.17_attn_Constant_33_attr__value: "0x080000000200000000000000", __transformer_blocks.17_attn_Constant_34_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.17_attn_Constant_35_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.17_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.17_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.17_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.17_attn_Constant_39_attr__value: "0x080000000000000000000000", __transformer_blocks.17_attn_Constant_40_attr__value: "0x080000000000000000000000", __transformer_blocks.17_attn_Constant_41_attr__value: "0x080000000300000000000000", __transformer_blocks.17_attn_Constant_42_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.17_attn_Constant_43_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.17_attn_Constant_44_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.17_attn_Constant_45_attr__value: "0x080000000000803F", __transformer_blocks.17_attn_Constant_46_attr__value: "0x080000001800000000000000", _Constant_13153_attr__value: "0x080000000000000000000000", __transformer_blocks.17_attn_Constant_47_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_13156_attr__value: "0x080000000000000000000000", __transformer_blocks.17_attn_Constant_48_attr__value: "0x080000000100000000000000", __transformer_blocks.17_attn_Constant_49_attr__value: "0x080000000100000000000000", __transformer_blocks.17_attn_Constant_50_attr__value: "0x080000000000000000000000", __transformer_blocks.17_attn_Constant_51_attr__value: "0x080000000000000000000000", __transformer_blocks.17_attn_Constant_52_attr__value: "0x080000000100000000000000", __transformer_blocks.17_attn_Constant_53_attr__value: "0x080000000100000000000000", __transformer_blocks.17_attn_Constant_54_attr__value: "0x080000000000000000000000", __transformer_blocks.17_attn_Constant_55_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.17_attn_Constant_56_attr__value: "0x080000000100000000000000", __transformer_blocks.17_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.17_Constant_1_attr__value: "0x080000000100000000000000", __transformer_blocks.17_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.17_Constant_3_attr__value: "0x080000000100000000000000", __transformer_blocks.17_ff_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.17_ff_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.17_ff_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.17_ff_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.17_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.17_Constant_5_attr__value: "0x080000000100000000000000", __transformer_blocks.17_Constant_6_attr__value: "0x080000000100000000000000", __transformer_blocks.17_Constant_7_attr__value: "0x08000000803F", __transformer_blocks.17_Constant_8_attr__value: "0x080000000100000000000000", __transformer_blocks.17_ff_context_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.17_ff_context_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.17_ff_context_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.17_ff_context_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.17_Constant_9_attr__value: "0x080000000100000000000000", __transformer_blocks.18_norm1_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.18_norm1_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.18_norm1_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.18_norm1_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.18_norm1_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.18_norm1_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.18_norm1_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.18_norm1_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.18_norm1_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.18_norm1_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.18_norm1_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.18_norm1_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.18_norm1_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.18_norm1_context_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.18_norm1_context_Constant_1_attr__value: "0x080000000000000000000000", __transformer_blocks.18_norm1_context_Constant_2_attr__value: "0x080000000500000000000000", __transformer_blocks.18_norm1_context_Constant_3_attr__value: "0x080000000600000000000000", __transformer_blocks.18_norm1_context_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.18_norm1_context_Constant_5_attr__value: "0x080000000200000000000000", __transformer_blocks.18_norm1_context_Constant_6_attr__value: "0x080000000300000000000000", __transformer_blocks.18_norm1_context_Constant_7_attr__value: "0x080000000400000000000000", __transformer_blocks.18_norm1_context_Constant_8_attr__value: "0x080000000500000000000000", __transformer_blocks.18_norm1_context_Constant_9_attr__value: "0x080000000600000000000000", __transformer_blocks.18_norm1_context_Constant_10_attr__value: "0x080000000100000000000000", __transformer_blocks.18_norm1_context_Constant_11_attr__value: "0x08000000803F", __transformer_blocks.18_norm1_context_Constant_12_attr__value: "0x080000000100000000000000", __transformer_blocks.18_attn_Constant_attr__value: "0x080000000000000000000000", __transformer_blocks.18_attn_Constant_1_attr__value: "0x080000000200000000000000", __transformer_blocks.18_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_13344_attr__value: "0x080000000000000000000000", __transformer_blocks.18_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.18_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_13348_attr__value: "0x080000000000000000000000", _Constant_13351_attr__value: "0x080000000000000000000000", __transformer_blocks.18_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.18_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_13355_attr__value: "0x080000000000000000000000", _Constant_13358_attr__value: "0x080000000000000000000000", __transformer_blocks.18_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.18_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_13362_attr__value: "0x080000000000000000000000", _Constant_13365_attr__value: "0x080000000000000000000000", __transformer_blocks.18_attn_Constant_9_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.18_attn_Constant_10_attr__value: "0x080000001800000000000000", _Constant_13369_attr__value: "0x080000000000000000000000", _Constant_13372_attr__value: "0x080000000000000000000000", __transformer_blocks.18_attn_Constant_11_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.18_attn_Constant_12_attr__value: "0x080000001800000000000000", _Constant_13376_attr__value: "0x080000000000000000000000", _Constant_13379_attr__value: "0x080000000000000000000000", __transformer_blocks.18_attn_Constant_13_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.18_attn_Constant_14_attr__value: "0x080000001800000000000000", _Constant_13383_attr__value: "0x080000000000000000000000", __transformer_blocks.18_attn_norm_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.18_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.18_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.18_attn_norm_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.18_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.18_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.18_attn_norm_added_q_Constant_attr__value: "0x0800000000000040", __transformer_blocks.18_attn_norm_added_q_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.18_attn_norm_added_q_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.18_attn_norm_added_k_Constant_attr__value: "0x0800000000000040", __transformer_blocks.18_attn_norm_added_k_Constant_1_attr__value: "0x08000000BD378635", __transformer_blocks.18_attn_norm_added_k_Constant_2_attr__value: "0x080000000000803F", __transformer_blocks.18_attn_Constant_15_attr__value: "0x080000000000000000000000", __transformer_blocks.18_attn_Constant_16_attr__value: "0x080000000100000000000000", __transformer_blocks.18_attn_Constant_17_attr__value: "0x080000000200000000000000", _Constant_13468_attr__value: "0x080000000000000000000000", _Constant_13470_attr__value: "0x080000000000000000000000", _Constant_13472_attr__value: "0x080000000000000000000000", __transformer_blocks.18_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.18_attn_Constant_19_attr__value: "0x080000000200000000000000", __transformer_blocks.18_attn_Constant_20_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.18_attn_Constant_21_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.18_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.18_attn_Constant_23_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.18_attn_Constant_24_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.18_attn_Constant_25_attr__value: "0x080000000000000000000000", __transformer_blocks.18_attn_Constant_26_attr__value: "0x080000000000000000000000", __transformer_blocks.18_attn_Constant_27_attr__value: "0x080000000300000000000000", __transformer_blocks.18_attn_Constant_28_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.18_attn_Constant_29_attr__value: "0x080000000000000000000000", __transformer_blocks.18_attn_Constant_30_attr__value: "0x080000000100000000000000", __transformer_blocks.18_attn_Constant_31_attr__value: "0x080000000200000000000000", _Constant_13513_attr__value: "0x080000000000000000000000", _Constant_13515_attr__value: "0x080000000000000000000000", _Constant_13517_attr__value: "0x080000000000000000000000", __transformer_blocks.18_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.18_attn_Constant_33_attr__value: "0x080000000200000000000000", __transformer_blocks.18_attn_Constant_34_attr__value: "0x0800000001000000000000000100000000000000", __transformer_blocks.18_attn_Constant_35_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.18_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.18_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.18_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.18_attn_Constant_39_attr__value: "0x080000000000000000000000", __transformer_blocks.18_attn_Constant_40_attr__value: "0x080000000000000000000000", __transformer_blocks.18_attn_Constant_41_attr__value: "0x080000000300000000000000", __transformer_blocks.18_attn_Constant_42_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.18_attn_Constant_43_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __transformer_blocks.18_attn_Constant_44_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.18_attn_Constant_45_attr__value: "0x080000000000803F", __transformer_blocks.18_attn_Constant_46_attr__value: "0x080000001800000000000000", _Constant_13570_attr__value: "0x080000000000000000000000", __transformer_blocks.18_attn_Constant_47_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_13573_attr__value: "0x080000000000000000000000", __transformer_blocks.18_attn_Constant_48_attr__value: "0x080000000100000000000000", __transformer_blocks.18_attn_Constant_49_attr__value: "0x080000000100000000000000", __transformer_blocks.18_attn_Constant_50_attr__value: "0x080000000000000000000000", __transformer_blocks.18_attn_Constant_51_attr__value: "0x080000000000000000000000", __transformer_blocks.18_attn_Constant_52_attr__value: "0x080000000100000000000000", __transformer_blocks.18_attn_Constant_53_attr__value: "0x080000000100000000000000", __transformer_blocks.18_attn_Constant_54_attr__value: "0x080000000000000000000000", __transformer_blocks.18_attn_Constant_55_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __transformer_blocks.18_attn_Constant_56_attr__value: "0x080000000100000000000000", __transformer_blocks.18_Constant_attr__value: "0x080000000100000000000000", __transformer_blocks.18_Constant_1_attr__value: "0x080000000100000000000000", __transformer_blocks.18_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.18_Constant_3_attr__value: "0x080000000100000000000000", __transformer_blocks.18_ff_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.18_ff_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.18_ff_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.18_ff_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.18_Constant_4_attr__value: "0x080000000100000000000000", __transformer_blocks.18_Constant_5_attr__value: "0x080000000100000000000000", __transformer_blocks.18_Constant_6_attr__value: "0x080000000100000000000000", __transformer_blocks.18_Constant_7_attr__value: "0x08000000803F", __transformer_blocks.18_Constant_8_attr__value: "0x080000000100000000000000", __transformer_blocks.18_ff_context_net.0_Constant_attr__value: "0x08000000373D", __transformer_blocks.18_ff_context_net.0_Constant_1_attr__value: "0x080000004C3F", __transformer_blocks.18_ff_context_net.0_Constant_2_attr__value: "0x08000000803F", __transformer_blocks.18_ff_context_net.0_Constant_3_attr__value: "0x08000000003F", __transformer_blocks.18_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.0_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.0_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.0_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.0_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.0_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.0_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.0_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.0_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.0_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.0_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.0_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.0_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.0_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.0_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.0_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.0_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.0_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_13730_attr__value: "0x080000000000000000000000", __single_transformer_blocks.0_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.0_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_13734_attr__value: "0x080000000000000000000000", _Constant_13737_attr__value: "0x080000000000000000000000", __single_transformer_blocks.0_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.0_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_13741_attr__value: "0x080000000000000000000000", _Constant_13744_attr__value: "0x080000000000000000000000", __single_transformer_blocks.0_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.0_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_13748_attr__value: "0x080000000000000000000000", __single_transformer_blocks.0_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.0_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.0_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.0_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.0_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.0_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.0_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.0_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.0_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_13792_attr__value: "0x080000000000000000000000", _Constant_13794_attr__value: "0x080000000000000000000000", _Constant_13796_attr__value: "0x080000000000000000000000", __single_transformer_blocks.0_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.0_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.0_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.0_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.0_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.0_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.0_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.0_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.0_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.0_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.0_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.0_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.0_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.0_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_13837_attr__value: "0x080000000000000000000000", _Constant_13839_attr__value: "0x080000000000000000000000", _Constant_13841_attr__value: "0x080000000000000000000000", __single_transformer_blocks.0_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.0_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.0_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.0_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.0_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.0_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.0_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.0_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.0_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.0_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.0_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.0_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.0_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.0_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.0_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_13894_attr__value: "0x080000000000000000000000", __single_transformer_blocks.0_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_13897_attr__value: "0x080000000000000000000000", __single_transformer_blocks.0_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.1_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.1_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.1_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.1_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.1_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.1_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.1_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.1_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.1_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.1_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.1_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.1_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.1_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.1_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.1_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.1_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.1_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_13969_attr__value: "0x080000000000000000000000", __single_transformer_blocks.1_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.1_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_13973_attr__value: "0x080000000000000000000000", _Constant_13976_attr__value: "0x080000000000000000000000", __single_transformer_blocks.1_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.1_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_13980_attr__value: "0x080000000000000000000000", _Constant_13983_attr__value: "0x080000000000000000000000", __single_transformer_blocks.1_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.1_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_13987_attr__value: "0x080000000000000000000000", __single_transformer_blocks.1_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.1_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.1_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.1_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.1_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.1_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.1_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.1_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.1_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_14031_attr__value: "0x080000000000000000000000", _Constant_14033_attr__value: "0x080000000000000000000000", _Constant_14035_attr__value: "0x080000000000000000000000", __single_transformer_blocks.1_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.1_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.1_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.1_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.1_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.1_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.1_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.1_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.1_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.1_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.1_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.1_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.1_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.1_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_14076_attr__value: "0x080000000000000000000000", _Constant_14078_attr__value: "0x080000000000000000000000", _Constant_14080_attr__value: "0x080000000000000000000000", __single_transformer_blocks.1_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.1_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.1_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.1_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.1_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.1_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.1_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.1_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.1_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.1_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.1_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.1_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.1_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.1_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.1_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_14133_attr__value: "0x080000000000000000000000", __single_transformer_blocks.1_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_14136_attr__value: "0x080000000000000000000000", __single_transformer_blocks.1_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.2_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.2_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.2_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.2_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.2_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.2_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.2_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.2_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.2_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.2_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.2_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.2_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.2_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.2_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.2_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.2_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.2_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_14208_attr__value: "0x080000000000000000000000", __single_transformer_blocks.2_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.2_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_14212_attr__value: "0x080000000000000000000000", _Constant_14215_attr__value: "0x080000000000000000000000", __single_transformer_blocks.2_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.2_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_14219_attr__value: "0x080000000000000000000000", _Constant_14222_attr__value: "0x080000000000000000000000", __single_transformer_blocks.2_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.2_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_14226_attr__value: "0x080000000000000000000000", __single_transformer_blocks.2_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.2_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.2_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.2_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.2_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.2_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.2_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.2_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.2_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_14270_attr__value: "0x080000000000000000000000", _Constant_14272_attr__value: "0x080000000000000000000000", _Constant_14274_attr__value: "0x080000000000000000000000", __single_transformer_blocks.2_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.2_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.2_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.2_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.2_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.2_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.2_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.2_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.2_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.2_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.2_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.2_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.2_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.2_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_14315_attr__value: "0x080000000000000000000000", _Constant_14317_attr__value: "0x080000000000000000000000", _Constant_14319_attr__value: "0x080000000000000000000000", __single_transformer_blocks.2_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.2_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.2_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.2_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.2_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.2_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.2_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.2_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.2_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.2_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.2_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.2_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.2_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.2_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.2_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_14372_attr__value: "0x080000000000000000000000", __single_transformer_blocks.2_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_14375_attr__value: "0x080000000000000000000000", __single_transformer_blocks.2_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.3_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.3_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.3_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.3_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.3_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.3_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.3_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.3_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.3_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.3_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.3_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.3_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.3_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.3_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.3_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.3_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.3_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_14447_attr__value: "0x080000000000000000000000", __single_transformer_blocks.3_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.3_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_14451_attr__value: "0x080000000000000000000000", _Constant_14454_attr__value: "0x080000000000000000000000", __single_transformer_blocks.3_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.3_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_14458_attr__value: "0x080000000000000000000000", _Constant_14461_attr__value: "0x080000000000000000000000", __single_transformer_blocks.3_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.3_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_14465_attr__value: "0x080000000000000000000000", __single_transformer_blocks.3_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.3_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.3_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.3_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.3_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.3_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.3_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.3_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.3_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_14509_attr__value: "0x080000000000000000000000", _Constant_14511_attr__value: "0x080000000000000000000000", _Constant_14513_attr__value: "0x080000000000000000000000", __single_transformer_blocks.3_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.3_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.3_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.3_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.3_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.3_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.3_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.3_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.3_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.3_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.3_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.3_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.3_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.3_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_14554_attr__value: "0x080000000000000000000000", _Constant_14556_attr__value: "0x080000000000000000000000", _Constant_14558_attr__value: "0x080000000000000000000000", __single_transformer_blocks.3_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.3_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.3_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.3_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.3_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.3_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.3_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.3_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.3_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.3_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.3_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.3_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.3_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.3_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.3_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_14611_attr__value: "0x080000000000000000000000", __single_transformer_blocks.3_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_14614_attr__value: "0x080000000000000000000000", __single_transformer_blocks.3_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.4_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.4_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.4_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.4_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.4_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.4_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.4_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.4_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.4_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.4_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.4_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.4_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.4_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.4_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.4_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.4_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.4_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_14686_attr__value: "0x080000000000000000000000", __single_transformer_blocks.4_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.4_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_14690_attr__value: "0x080000000000000000000000", _Constant_14693_attr__value: "0x080000000000000000000000", __single_transformer_blocks.4_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.4_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_14697_attr__value: "0x080000000000000000000000", _Constant_14700_attr__value: "0x080000000000000000000000", __single_transformer_blocks.4_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.4_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_14704_attr__value: "0x080000000000000000000000", __single_transformer_blocks.4_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.4_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.4_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.4_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.4_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.4_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.4_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.4_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.4_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_14748_attr__value: "0x080000000000000000000000", _Constant_14750_attr__value: "0x080000000000000000000000", _Constant_14752_attr__value: "0x080000000000000000000000", __single_transformer_blocks.4_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.4_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.4_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.4_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.4_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.4_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.4_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.4_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.4_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.4_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.4_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.4_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.4_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.4_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_14793_attr__value: "0x080000000000000000000000", _Constant_14795_attr__value: "0x080000000000000000000000", _Constant_14797_attr__value: "0x080000000000000000000000", __single_transformer_blocks.4_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.4_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.4_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.4_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.4_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.4_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.4_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.4_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.4_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.4_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.4_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.4_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.4_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.4_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.4_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_14850_attr__value: "0x080000000000000000000000", __single_transformer_blocks.4_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_14853_attr__value: "0x080000000000000000000000", __single_transformer_blocks.4_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.5_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.5_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.5_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.5_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.5_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.5_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.5_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.5_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.5_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.5_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.5_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.5_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.5_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.5_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.5_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.5_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.5_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_14925_attr__value: "0x080000000000000000000000", __single_transformer_blocks.5_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.5_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_14929_attr__value: "0x080000000000000000000000", _Constant_14932_attr__value: "0x080000000000000000000000", __single_transformer_blocks.5_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.5_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_14936_attr__value: "0x080000000000000000000000", _Constant_14939_attr__value: "0x080000000000000000000000", __single_transformer_blocks.5_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.5_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_14943_attr__value: "0x080000000000000000000000", __single_transformer_blocks.5_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.5_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.5_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.5_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.5_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.5_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.5_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.5_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.5_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_14987_attr__value: "0x080000000000000000000000", _Constant_14989_attr__value: "0x080000000000000000000000", _Constant_14991_attr__value: "0x080000000000000000000000", __single_transformer_blocks.5_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.5_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.5_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.5_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.5_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.5_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.5_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.5_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.5_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.5_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.5_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.5_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.5_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.5_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_15032_attr__value: "0x080000000000000000000000", _Constant_15034_attr__value: "0x080000000000000000000000", _Constant_15036_attr__value: "0x080000000000000000000000", __single_transformer_blocks.5_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.5_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.5_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.5_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.5_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.5_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.5_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.5_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.5_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.5_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.5_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.5_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.5_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.5_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.5_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_15089_attr__value: "0x080000000000000000000000", __single_transformer_blocks.5_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_15092_attr__value: "0x080000000000000000000000", __single_transformer_blocks.5_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.6_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.6_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.6_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.6_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.6_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.6_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.6_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.6_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.6_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.6_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.6_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.6_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.6_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.6_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.6_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.6_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.6_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_15164_attr__value: "0x080000000000000000000000", __single_transformer_blocks.6_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.6_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_15168_attr__value: "0x080000000000000000000000", _Constant_15171_attr__value: "0x080000000000000000000000", __single_transformer_blocks.6_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.6_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_15175_attr__value: "0x080000000000000000000000", _Constant_15178_attr__value: "0x080000000000000000000000", __single_transformer_blocks.6_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.6_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_15182_attr__value: "0x080000000000000000000000", __single_transformer_blocks.6_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.6_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.6_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.6_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.6_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.6_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.6_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.6_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.6_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_15226_attr__value: "0x080000000000000000000000", _Constant_15228_attr__value: "0x080000000000000000000000", _Constant_15230_attr__value: "0x080000000000000000000000", __single_transformer_blocks.6_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.6_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.6_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.6_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.6_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.6_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.6_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.6_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.6_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.6_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.6_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.6_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.6_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.6_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_15271_attr__value: "0x080000000000000000000000", _Constant_15273_attr__value: "0x080000000000000000000000", _Constant_15275_attr__value: "0x080000000000000000000000", __single_transformer_blocks.6_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.6_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.6_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.6_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.6_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.6_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.6_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.6_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.6_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.6_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.6_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.6_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.6_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.6_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.6_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_15328_attr__value: "0x080000000000000000000000", __single_transformer_blocks.6_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_15331_attr__value: "0x080000000000000000000000", __single_transformer_blocks.6_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.7_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.7_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.7_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.7_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.7_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.7_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.7_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.7_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.7_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.7_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.7_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.7_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.7_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.7_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.7_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.7_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.7_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_15403_attr__value: "0x080000000000000000000000", __single_transformer_blocks.7_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.7_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_15407_attr__value: "0x080000000000000000000000", _Constant_15410_attr__value: "0x080000000000000000000000", __single_transformer_blocks.7_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.7_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_15414_attr__value: "0x080000000000000000000000", _Constant_15417_attr__value: "0x080000000000000000000000", __single_transformer_blocks.7_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.7_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_15421_attr__value: "0x080000000000000000000000", __single_transformer_blocks.7_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.7_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.7_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.7_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.7_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.7_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.7_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.7_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.7_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_15465_attr__value: "0x080000000000000000000000", _Constant_15467_attr__value: "0x080000000000000000000000", _Constant_15469_attr__value: "0x080000000000000000000000", __single_transformer_blocks.7_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.7_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.7_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.7_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.7_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.7_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.7_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.7_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.7_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.7_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.7_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.7_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.7_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.7_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_15510_attr__value: "0x080000000000000000000000", _Constant_15512_attr__value: "0x080000000000000000000000", _Constant_15514_attr__value: "0x080000000000000000000000", __single_transformer_blocks.7_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.7_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.7_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.7_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.7_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.7_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.7_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.7_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.7_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.7_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.7_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.7_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.7_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.7_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.7_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_15567_attr__value: "0x080000000000000000000000", __single_transformer_blocks.7_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_15570_attr__value: "0x080000000000000000000000", __single_transformer_blocks.7_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.8_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.8_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.8_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.8_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.8_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.8_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.8_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.8_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.8_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.8_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.8_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.8_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.8_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.8_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.8_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.8_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.8_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_15642_attr__value: "0x080000000000000000000000", __single_transformer_blocks.8_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.8_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_15646_attr__value: "0x080000000000000000000000", _Constant_15649_attr__value: "0x080000000000000000000000", __single_transformer_blocks.8_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.8_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_15653_attr__value: "0x080000000000000000000000", _Constant_15656_attr__value: "0x080000000000000000000000", __single_transformer_blocks.8_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.8_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_15660_attr__value: "0x080000000000000000000000", __single_transformer_blocks.8_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.8_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.8_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.8_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.8_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.8_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.8_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.8_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.8_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_15704_attr__value: "0x080000000000000000000000", _Constant_15706_attr__value: "0x080000000000000000000000", _Constant_15708_attr__value: "0x080000000000000000000000", __single_transformer_blocks.8_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.8_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.8_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.8_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.8_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.8_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.8_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.8_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.8_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.8_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.8_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.8_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.8_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.8_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_15749_attr__value: "0x080000000000000000000000", _Constant_15751_attr__value: "0x080000000000000000000000", _Constant_15753_attr__value: "0x080000000000000000000000", __single_transformer_blocks.8_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.8_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.8_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.8_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.8_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.8_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.8_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.8_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.8_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.8_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.8_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.8_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.8_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.8_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.8_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_15806_attr__value: "0x080000000000000000000000", __single_transformer_blocks.8_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_15809_attr__value: "0x080000000000000000000000", __single_transformer_blocks.8_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.9_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.9_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.9_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.9_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.9_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.9_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.9_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.9_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.9_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.9_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.9_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.9_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.9_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.9_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.9_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.9_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.9_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_15881_attr__value: "0x080000000000000000000000", __single_transformer_blocks.9_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.9_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_15885_attr__value: "0x080000000000000000000000", _Constant_15888_attr__value: "0x080000000000000000000000", __single_transformer_blocks.9_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.9_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_15892_attr__value: "0x080000000000000000000000", _Constant_15895_attr__value: "0x080000000000000000000000", __single_transformer_blocks.9_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.9_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_15899_attr__value: "0x080000000000000000000000", __single_transformer_blocks.9_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.9_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.9_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.9_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.9_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.9_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.9_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.9_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.9_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_15943_attr__value: "0x080000000000000000000000", _Constant_15945_attr__value: "0x080000000000000000000000", _Constant_15947_attr__value: "0x080000000000000000000000", __single_transformer_blocks.9_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.9_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.9_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.9_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.9_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.9_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.9_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.9_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.9_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.9_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.9_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.9_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.9_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.9_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_15988_attr__value: "0x080000000000000000000000", _Constant_15990_attr__value: "0x080000000000000000000000", _Constant_15992_attr__value: "0x080000000000000000000000", __single_transformer_blocks.9_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.9_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.9_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.9_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.9_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.9_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.9_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.9_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.9_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.9_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.9_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.9_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.9_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.9_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.9_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_16045_attr__value: "0x080000000000000000000000", __single_transformer_blocks.9_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_16048_attr__value: "0x080000000000000000000000", __single_transformer_blocks.9_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.10_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.10_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.10_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.10_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.10_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.10_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.10_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.10_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.10_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.10_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.10_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.10_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.10_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.10_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.10_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.10_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.10_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_16120_attr__value: "0x080000000000000000000000", __single_transformer_blocks.10_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.10_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_16124_attr__value: "0x080000000000000000000000", _Constant_16127_attr__value: "0x080000000000000000000000", __single_transformer_blocks.10_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.10_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_16131_attr__value: "0x080000000000000000000000", _Constant_16134_attr__value: "0x080000000000000000000000", __single_transformer_blocks.10_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.10_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_16138_attr__value: "0x080000000000000000000000", __single_transformer_blocks.10_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.10_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.10_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.10_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.10_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.10_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.10_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.10_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.10_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_16182_attr__value: "0x080000000000000000000000", _Constant_16184_attr__value: "0x080000000000000000000000", _Constant_16186_attr__value: "0x080000000000000000000000", __single_transformer_blocks.10_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.10_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.10_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.10_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.10_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.10_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.10_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.10_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.10_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.10_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.10_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.10_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.10_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.10_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_16227_attr__value: "0x080000000000000000000000", _Constant_16229_attr__value: "0x080000000000000000000000", _Constant_16231_attr__value: "0x080000000000000000000000", __single_transformer_blocks.10_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.10_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.10_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.10_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.10_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.10_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.10_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.10_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.10_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.10_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.10_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.10_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.10_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.10_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.10_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_16284_attr__value: "0x080000000000000000000000", __single_transformer_blocks.10_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_16287_attr__value: "0x080000000000000000000000", __single_transformer_blocks.10_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.11_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.11_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.11_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.11_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.11_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.11_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.11_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.11_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.11_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.11_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.11_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.11_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.11_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.11_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.11_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.11_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.11_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_16359_attr__value: "0x080000000000000000000000", __single_transformer_blocks.11_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.11_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_16363_attr__value: "0x080000000000000000000000", _Constant_16366_attr__value: "0x080000000000000000000000", __single_transformer_blocks.11_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.11_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_16370_attr__value: "0x080000000000000000000000", _Constant_16373_attr__value: "0x080000000000000000000000", __single_transformer_blocks.11_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.11_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_16377_attr__value: "0x080000000000000000000000", __single_transformer_blocks.11_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.11_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.11_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.11_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.11_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.11_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.11_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.11_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.11_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_16421_attr__value: "0x080000000000000000000000", _Constant_16423_attr__value: "0x080000000000000000000000", _Constant_16425_attr__value: "0x080000000000000000000000", __single_transformer_blocks.11_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.11_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.11_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.11_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.11_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.11_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.11_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.11_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.11_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.11_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.11_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.11_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.11_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.11_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_16466_attr__value: "0x080000000000000000000000", _Constant_16468_attr__value: "0x080000000000000000000000", _Constant_16470_attr__value: "0x080000000000000000000000", __single_transformer_blocks.11_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.11_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.11_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.11_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.11_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.11_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.11_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.11_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.11_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.11_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.11_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.11_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.11_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.11_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.11_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_16523_attr__value: "0x080000000000000000000000", __single_transformer_blocks.11_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_16526_attr__value: "0x080000000000000000000000", __single_transformer_blocks.11_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.12_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.12_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.12_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.12_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.12_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.12_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.12_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.12_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.12_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.12_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.12_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.12_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.12_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.12_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.12_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.12_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.12_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_16598_attr__value: "0x080000000000000000000000", __single_transformer_blocks.12_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.12_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_16602_attr__value: "0x080000000000000000000000", _Constant_16605_attr__value: "0x080000000000000000000000", __single_transformer_blocks.12_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.12_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_16609_attr__value: "0x080000000000000000000000", _Constant_16612_attr__value: "0x080000000000000000000000", __single_transformer_blocks.12_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.12_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_16616_attr__value: "0x080000000000000000000000", __single_transformer_blocks.12_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.12_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.12_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.12_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.12_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.12_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.12_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.12_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.12_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_16660_attr__value: "0x080000000000000000000000", _Constant_16662_attr__value: "0x080000000000000000000000", _Constant_16664_attr__value: "0x080000000000000000000000", __single_transformer_blocks.12_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.12_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.12_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.12_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.12_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.12_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.12_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.12_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.12_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.12_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.12_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.12_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.12_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.12_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_16705_attr__value: "0x080000000000000000000000", _Constant_16707_attr__value: "0x080000000000000000000000", _Constant_16709_attr__value: "0x080000000000000000000000", __single_transformer_blocks.12_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.12_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.12_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.12_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.12_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.12_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.12_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.12_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.12_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.12_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.12_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.12_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.12_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.12_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.12_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_16762_attr__value: "0x080000000000000000000000", __single_transformer_blocks.12_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_16765_attr__value: "0x080000000000000000000000", __single_transformer_blocks.12_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.13_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.13_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.13_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.13_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.13_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.13_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.13_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.13_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.13_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.13_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.13_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.13_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.13_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.13_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.13_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.13_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.13_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_16837_attr__value: "0x080000000000000000000000", __single_transformer_blocks.13_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.13_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_16841_attr__value: "0x080000000000000000000000", _Constant_16844_attr__value: "0x080000000000000000000000", __single_transformer_blocks.13_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.13_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_16848_attr__value: "0x080000000000000000000000", _Constant_16851_attr__value: "0x080000000000000000000000", __single_transformer_blocks.13_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.13_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_16855_attr__value: "0x080000000000000000000000", __single_transformer_blocks.13_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.13_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.13_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.13_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.13_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.13_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.13_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.13_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.13_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_16899_attr__value: "0x080000000000000000000000", _Constant_16901_attr__value: "0x080000000000000000000000", _Constant_16903_attr__value: "0x080000000000000000000000", __single_transformer_blocks.13_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.13_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.13_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.13_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.13_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.13_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.13_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.13_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.13_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.13_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.13_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.13_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.13_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.13_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_16944_attr__value: "0x080000000000000000000000", _Constant_16946_attr__value: "0x080000000000000000000000", _Constant_16948_attr__value: "0x080000000000000000000000", __single_transformer_blocks.13_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.13_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.13_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.13_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.13_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.13_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.13_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.13_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.13_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.13_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.13_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.13_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.13_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.13_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.13_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_17001_attr__value: "0x080000000000000000000000", __single_transformer_blocks.13_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_17004_attr__value: "0x080000000000000000000000", __single_transformer_blocks.13_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.14_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.14_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.14_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.14_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.14_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.14_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.14_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.14_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.14_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.14_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.14_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.14_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.14_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.14_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.14_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.14_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.14_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_17076_attr__value: "0x080000000000000000000000", __single_transformer_blocks.14_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.14_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_17080_attr__value: "0x080000000000000000000000", _Constant_17083_attr__value: "0x080000000000000000000000", __single_transformer_blocks.14_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.14_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_17087_attr__value: "0x080000000000000000000000", _Constant_17090_attr__value: "0x080000000000000000000000", __single_transformer_blocks.14_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.14_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_17094_attr__value: "0x080000000000000000000000", __single_transformer_blocks.14_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.14_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.14_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.14_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.14_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.14_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.14_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.14_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.14_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_17138_attr__value: "0x080000000000000000000000", _Constant_17140_attr__value: "0x080000000000000000000000", _Constant_17142_attr__value: "0x080000000000000000000000", __single_transformer_blocks.14_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.14_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.14_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.14_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.14_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.14_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.14_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.14_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.14_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.14_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.14_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.14_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.14_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.14_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_17183_attr__value: "0x080000000000000000000000", _Constant_17185_attr__value: "0x080000000000000000000000", _Constant_17187_attr__value: "0x080000000000000000000000", __single_transformer_blocks.14_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.14_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.14_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.14_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.14_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.14_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.14_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.14_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.14_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.14_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.14_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.14_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.14_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.14_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.14_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_17240_attr__value: "0x080000000000000000000000", __single_transformer_blocks.14_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_17243_attr__value: "0x080000000000000000000000", __single_transformer_blocks.14_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.15_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.15_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.15_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.15_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.15_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.15_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.15_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.15_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.15_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.15_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.15_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.15_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.15_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.15_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.15_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.15_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.15_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_17315_attr__value: "0x080000000000000000000000", __single_transformer_blocks.15_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.15_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_17319_attr__value: "0x080000000000000000000000", _Constant_17322_attr__value: "0x080000000000000000000000", __single_transformer_blocks.15_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.15_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_17326_attr__value: "0x080000000000000000000000", _Constant_17329_attr__value: "0x080000000000000000000000", __single_transformer_blocks.15_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.15_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_17333_attr__value: "0x080000000000000000000000", __single_transformer_blocks.15_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.15_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.15_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.15_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.15_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.15_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.15_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.15_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.15_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_17377_attr__value: "0x080000000000000000000000", _Constant_17379_attr__value: "0x080000000000000000000000", _Constant_17381_attr__value: "0x080000000000000000000000", __single_transformer_blocks.15_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.15_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.15_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.15_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.15_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.15_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.15_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.15_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.15_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.15_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.15_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.15_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.15_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.15_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_17422_attr__value: "0x080000000000000000000000", _Constant_17424_attr__value: "0x080000000000000000000000", _Constant_17426_attr__value: "0x080000000000000000000000", __single_transformer_blocks.15_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.15_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.15_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.15_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.15_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.15_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.15_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.15_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.15_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.15_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.15_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.15_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.15_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.15_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.15_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_17479_attr__value: "0x080000000000000000000000", __single_transformer_blocks.15_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_17482_attr__value: "0x080000000000000000000000", __single_transformer_blocks.15_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.16_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.16_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.16_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.16_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.16_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.16_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.16_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.16_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.16_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.16_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.16_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.16_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.16_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.16_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.16_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.16_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.16_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_17554_attr__value: "0x080000000000000000000000", __single_transformer_blocks.16_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.16_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_17558_attr__value: "0x080000000000000000000000", _Constant_17561_attr__value: "0x080000000000000000000000", __single_transformer_blocks.16_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.16_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_17565_attr__value: "0x080000000000000000000000", _Constant_17568_attr__value: "0x080000000000000000000000", __single_transformer_blocks.16_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.16_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_17572_attr__value: "0x080000000000000000000000", __single_transformer_blocks.16_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.16_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.16_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.16_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.16_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.16_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.16_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.16_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.16_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_17616_attr__value: "0x080000000000000000000000", _Constant_17618_attr__value: "0x080000000000000000000000", _Constant_17620_attr__value: "0x080000000000000000000000", __single_transformer_blocks.16_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.16_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.16_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.16_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.16_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.16_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.16_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.16_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.16_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.16_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.16_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.16_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.16_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.16_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_17661_attr__value: "0x080000000000000000000000", _Constant_17663_attr__value: "0x080000000000000000000000", _Constant_17665_attr__value: "0x080000000000000000000000", __single_transformer_blocks.16_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.16_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.16_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.16_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.16_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.16_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.16_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.16_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.16_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.16_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.16_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.16_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.16_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.16_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.16_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_17718_attr__value: "0x080000000000000000000000", __single_transformer_blocks.16_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_17721_attr__value: "0x080000000000000000000000", __single_transformer_blocks.16_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.17_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.17_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.17_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.17_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.17_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.17_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.17_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.17_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.17_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.17_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.17_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.17_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.17_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.17_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.17_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.17_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.17_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_17793_attr__value: "0x080000000000000000000000", __single_transformer_blocks.17_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.17_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_17797_attr__value: "0x080000000000000000000000", _Constant_17800_attr__value: "0x080000000000000000000000", __single_transformer_blocks.17_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.17_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_17804_attr__value: "0x080000000000000000000000", _Constant_17807_attr__value: "0x080000000000000000000000", __single_transformer_blocks.17_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.17_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_17811_attr__value: "0x080000000000000000000000", __single_transformer_blocks.17_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.17_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.17_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.17_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.17_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.17_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.17_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.17_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.17_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_17855_attr__value: "0x080000000000000000000000", _Constant_17857_attr__value: "0x080000000000000000000000", _Constant_17859_attr__value: "0x080000000000000000000000", __single_transformer_blocks.17_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.17_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.17_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.17_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.17_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.17_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.17_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.17_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.17_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.17_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.17_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.17_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.17_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.17_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_17900_attr__value: "0x080000000000000000000000", _Constant_17902_attr__value: "0x080000000000000000000000", _Constant_17904_attr__value: "0x080000000000000000000000", __single_transformer_blocks.17_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.17_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.17_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.17_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.17_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.17_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.17_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.17_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.17_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.17_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.17_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.17_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.17_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.17_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.17_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_17957_attr__value: "0x080000000000000000000000", __single_transformer_blocks.17_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_17960_attr__value: "0x080000000000000000000000", __single_transformer_blocks.17_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.18_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.18_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.18_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.18_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.18_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.18_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.18_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.18_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.18_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.18_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.18_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.18_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.18_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.18_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.18_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.18_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.18_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_18032_attr__value: "0x080000000000000000000000", __single_transformer_blocks.18_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.18_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_18036_attr__value: "0x080000000000000000000000", _Constant_18039_attr__value: "0x080000000000000000000000", __single_transformer_blocks.18_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.18_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_18043_attr__value: "0x080000000000000000000000", _Constant_18046_attr__value: "0x080000000000000000000000", __single_transformer_blocks.18_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.18_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_18050_attr__value: "0x080000000000000000000000", __single_transformer_blocks.18_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.18_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.18_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.18_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.18_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.18_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.18_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.18_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.18_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_18094_attr__value: "0x080000000000000000000000", _Constant_18096_attr__value: "0x080000000000000000000000", _Constant_18098_attr__value: "0x080000000000000000000000", __single_transformer_blocks.18_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.18_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.18_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.18_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.18_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.18_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.18_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.18_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.18_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.18_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.18_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.18_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.18_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.18_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_18139_attr__value: "0x080000000000000000000000", _Constant_18141_attr__value: "0x080000000000000000000000", _Constant_18143_attr__value: "0x080000000000000000000000", __single_transformer_blocks.18_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.18_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.18_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.18_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.18_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.18_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.18_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.18_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.18_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.18_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.18_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.18_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.18_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.18_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.18_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_18196_attr__value: "0x080000000000000000000000", __single_transformer_blocks.18_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_18199_attr__value: "0x080000000000000000000000", __single_transformer_blocks.18_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.19_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.19_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.19_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.19_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.19_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.19_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.19_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.19_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.19_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.19_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.19_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.19_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.19_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.19_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.19_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.19_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.19_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_18271_attr__value: "0x080000000000000000000000", __single_transformer_blocks.19_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.19_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_18275_attr__value: "0x080000000000000000000000", _Constant_18278_attr__value: "0x080000000000000000000000", __single_transformer_blocks.19_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.19_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_18282_attr__value: "0x080000000000000000000000", _Constant_18285_attr__value: "0x080000000000000000000000", __single_transformer_blocks.19_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.19_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_18289_attr__value: "0x080000000000000000000000", __single_transformer_blocks.19_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.19_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.19_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.19_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.19_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.19_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.19_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.19_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.19_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_18333_attr__value: "0x080000000000000000000000", _Constant_18335_attr__value: "0x080000000000000000000000", _Constant_18337_attr__value: "0x080000000000000000000000", __single_transformer_blocks.19_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.19_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.19_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.19_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.19_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.19_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.19_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.19_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.19_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.19_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.19_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.19_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.19_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.19_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_18378_attr__value: "0x080000000000000000000000", _Constant_18380_attr__value: "0x080000000000000000000000", _Constant_18382_attr__value: "0x080000000000000000000000", __single_transformer_blocks.19_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.19_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.19_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.19_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.19_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.19_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.19_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.19_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.19_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.19_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.19_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.19_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.19_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.19_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.19_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_18435_attr__value: "0x080000000000000000000000", __single_transformer_blocks.19_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_18438_attr__value: "0x080000000000000000000000", __single_transformer_blocks.19_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.20_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.20_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.20_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.20_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.20_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.20_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.20_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.20_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.20_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.20_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.20_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.20_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.20_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.20_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.20_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.20_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.20_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_18510_attr__value: "0x080000000000000000000000", __single_transformer_blocks.20_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.20_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_18514_attr__value: "0x080000000000000000000000", _Constant_18517_attr__value: "0x080000000000000000000000", __single_transformer_blocks.20_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.20_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_18521_attr__value: "0x080000000000000000000000", _Constant_18524_attr__value: "0x080000000000000000000000", __single_transformer_blocks.20_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.20_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_18528_attr__value: "0x080000000000000000000000", __single_transformer_blocks.20_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.20_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.20_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.20_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.20_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.20_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.20_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.20_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.20_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_18572_attr__value: "0x080000000000000000000000", _Constant_18574_attr__value: "0x080000000000000000000000", _Constant_18576_attr__value: "0x080000000000000000000000", __single_transformer_blocks.20_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.20_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.20_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.20_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.20_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.20_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.20_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.20_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.20_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.20_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.20_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.20_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.20_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.20_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_18617_attr__value: "0x080000000000000000000000", _Constant_18619_attr__value: "0x080000000000000000000000", _Constant_18621_attr__value: "0x080000000000000000000000", __single_transformer_blocks.20_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.20_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.20_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.20_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.20_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.20_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.20_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.20_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.20_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.20_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.20_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.20_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.20_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.20_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.20_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_18674_attr__value: "0x080000000000000000000000", __single_transformer_blocks.20_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_18677_attr__value: "0x080000000000000000000000", __single_transformer_blocks.20_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.21_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.21_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.21_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.21_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.21_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.21_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.21_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.21_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.21_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.21_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.21_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.21_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.21_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.21_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.21_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.21_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.21_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_18749_attr__value: "0x080000000000000000000000", __single_transformer_blocks.21_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.21_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_18753_attr__value: "0x080000000000000000000000", _Constant_18756_attr__value: "0x080000000000000000000000", __single_transformer_blocks.21_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.21_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_18760_attr__value: "0x080000000000000000000000", _Constant_18763_attr__value: "0x080000000000000000000000", __single_transformer_blocks.21_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.21_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_18767_attr__value: "0x080000000000000000000000", __single_transformer_blocks.21_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.21_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.21_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.21_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.21_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.21_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.21_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.21_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.21_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_18811_attr__value: "0x080000000000000000000000", _Constant_18813_attr__value: "0x080000000000000000000000", _Constant_18815_attr__value: "0x080000000000000000000000", __single_transformer_blocks.21_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.21_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.21_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.21_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.21_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.21_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.21_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.21_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.21_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.21_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.21_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.21_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.21_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.21_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_18856_attr__value: "0x080000000000000000000000", _Constant_18858_attr__value: "0x080000000000000000000000", _Constant_18860_attr__value: "0x080000000000000000000000", __single_transformer_blocks.21_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.21_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.21_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.21_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.21_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.21_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.21_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.21_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.21_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.21_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.21_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.21_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.21_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.21_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.21_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_18913_attr__value: "0x080000000000000000000000", __single_transformer_blocks.21_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_18916_attr__value: "0x080000000000000000000000", __single_transformer_blocks.21_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.22_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.22_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.22_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.22_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.22_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.22_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.22_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.22_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.22_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.22_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.22_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.22_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.22_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.22_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.22_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.22_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.22_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_18988_attr__value: "0x080000000000000000000000", __single_transformer_blocks.22_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.22_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_18992_attr__value: "0x080000000000000000000000", _Constant_18995_attr__value: "0x080000000000000000000000", __single_transformer_blocks.22_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.22_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_18999_attr__value: "0x080000000000000000000000", _Constant_19002_attr__value: "0x080000000000000000000000", __single_transformer_blocks.22_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.22_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_19006_attr__value: "0x080000000000000000000000", __single_transformer_blocks.22_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.22_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.22_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.22_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.22_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.22_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.22_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.22_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.22_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_19050_attr__value: "0x080000000000000000000000", _Constant_19052_attr__value: "0x080000000000000000000000", _Constant_19054_attr__value: "0x080000000000000000000000", __single_transformer_blocks.22_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.22_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.22_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.22_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.22_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.22_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.22_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.22_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.22_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.22_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.22_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.22_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.22_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.22_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_19095_attr__value: "0x080000000000000000000000", _Constant_19097_attr__value: "0x080000000000000000000000", _Constant_19099_attr__value: "0x080000000000000000000000", __single_transformer_blocks.22_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.22_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.22_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.22_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.22_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.22_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.22_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.22_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.22_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.22_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.22_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.22_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.22_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.22_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.22_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_19152_attr__value: "0x080000000000000000000000", __single_transformer_blocks.22_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_19155_attr__value: "0x080000000000000000000000", __single_transformer_blocks.22_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.23_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.23_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.23_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.23_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.23_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.23_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.23_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.23_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.23_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.23_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.23_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.23_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.23_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.23_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.23_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.23_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.23_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_19227_attr__value: "0x080000000000000000000000", __single_transformer_blocks.23_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.23_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_19231_attr__value: "0x080000000000000000000000", _Constant_19234_attr__value: "0x080000000000000000000000", __single_transformer_blocks.23_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.23_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_19238_attr__value: "0x080000000000000000000000", _Constant_19241_attr__value: "0x080000000000000000000000", __single_transformer_blocks.23_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.23_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_19245_attr__value: "0x080000000000000000000000", __single_transformer_blocks.23_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.23_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.23_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.23_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.23_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.23_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.23_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.23_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.23_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_19289_attr__value: "0x080000000000000000000000", _Constant_19291_attr__value: "0x080000000000000000000000", _Constant_19293_attr__value: "0x080000000000000000000000", __single_transformer_blocks.23_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.23_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.23_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.23_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.23_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.23_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.23_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.23_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.23_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.23_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.23_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.23_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.23_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.23_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_19334_attr__value: "0x080000000000000000000000", _Constant_19336_attr__value: "0x080000000000000000000000", _Constant_19338_attr__value: "0x080000000000000000000000", __single_transformer_blocks.23_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.23_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.23_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.23_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.23_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.23_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.23_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.23_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.23_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.23_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.23_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.23_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.23_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.23_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.23_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_19391_attr__value: "0x080000000000000000000000", __single_transformer_blocks.23_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_19394_attr__value: "0x080000000000000000000000", __single_transformer_blocks.23_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.24_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.24_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.24_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.24_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.24_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.24_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.24_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.24_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.24_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.24_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.24_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.24_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.24_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.24_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.24_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.24_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.24_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_19466_attr__value: "0x080000000000000000000000", __single_transformer_blocks.24_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.24_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_19470_attr__value: "0x080000000000000000000000", _Constant_19473_attr__value: "0x080000000000000000000000", __single_transformer_blocks.24_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.24_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_19477_attr__value: "0x080000000000000000000000", _Constant_19480_attr__value: "0x080000000000000000000000", __single_transformer_blocks.24_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.24_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_19484_attr__value: "0x080000000000000000000000", __single_transformer_blocks.24_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.24_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.24_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.24_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.24_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.24_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.24_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.24_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.24_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_19528_attr__value: "0x080000000000000000000000", _Constant_19530_attr__value: "0x080000000000000000000000", _Constant_19532_attr__value: "0x080000000000000000000000", __single_transformer_blocks.24_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.24_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.24_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.24_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.24_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.24_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.24_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.24_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.24_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.24_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.24_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.24_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.24_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.24_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_19573_attr__value: "0x080000000000000000000000", _Constant_19575_attr__value: "0x080000000000000000000000", _Constant_19577_attr__value: "0x080000000000000000000000", __single_transformer_blocks.24_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.24_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.24_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.24_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.24_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.24_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.24_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.24_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.24_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.24_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.24_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.24_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.24_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.24_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.24_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_19630_attr__value: "0x080000000000000000000000", __single_transformer_blocks.24_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_19633_attr__value: "0x080000000000000000000000", __single_transformer_blocks.24_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.25_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.25_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.25_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.25_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.25_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.25_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.25_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.25_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.25_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.25_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.25_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.25_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.25_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.25_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.25_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.25_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.25_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_19705_attr__value: "0x080000000000000000000000", __single_transformer_blocks.25_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.25_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_19709_attr__value: "0x080000000000000000000000", _Constant_19712_attr__value: "0x080000000000000000000000", __single_transformer_blocks.25_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.25_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_19716_attr__value: "0x080000000000000000000000", _Constant_19719_attr__value: "0x080000000000000000000000", __single_transformer_blocks.25_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.25_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_19723_attr__value: "0x080000000000000000000000", __single_transformer_blocks.25_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.25_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.25_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.25_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.25_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.25_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.25_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.25_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.25_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_19767_attr__value: "0x080000000000000000000000", _Constant_19769_attr__value: "0x080000000000000000000000", _Constant_19771_attr__value: "0x080000000000000000000000", __single_transformer_blocks.25_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.25_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.25_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.25_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.25_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.25_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.25_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.25_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.25_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.25_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.25_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.25_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.25_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.25_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_19812_attr__value: "0x080000000000000000000000", _Constant_19814_attr__value: "0x080000000000000000000000", _Constant_19816_attr__value: "0x080000000000000000000000", __single_transformer_blocks.25_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.25_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.25_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.25_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.25_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.25_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.25_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.25_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.25_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.25_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.25_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.25_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.25_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.25_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.25_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_19869_attr__value: "0x080000000000000000000000", __single_transformer_blocks.25_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_19872_attr__value: "0x080000000000000000000000", __single_transformer_blocks.25_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.26_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.26_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.26_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.26_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.26_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.26_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.26_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.26_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.26_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.26_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.26_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.26_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.26_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.26_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.26_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.26_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.26_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_19944_attr__value: "0x080000000000000000000000", __single_transformer_blocks.26_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.26_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_19948_attr__value: "0x080000000000000000000000", _Constant_19951_attr__value: "0x080000000000000000000000", __single_transformer_blocks.26_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.26_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_19955_attr__value: "0x080000000000000000000000", _Constant_19958_attr__value: "0x080000000000000000000000", __single_transformer_blocks.26_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.26_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_19962_attr__value: "0x080000000000000000000000", __single_transformer_blocks.26_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.26_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.26_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.26_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.26_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.26_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.26_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.26_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.26_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_20006_attr__value: "0x080000000000000000000000", _Constant_20008_attr__value: "0x080000000000000000000000", _Constant_20010_attr__value: "0x080000000000000000000000", __single_transformer_blocks.26_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.26_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.26_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.26_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.26_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.26_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.26_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.26_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.26_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.26_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.26_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.26_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.26_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.26_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_20051_attr__value: "0x080000000000000000000000", _Constant_20053_attr__value: "0x080000000000000000000000", _Constant_20055_attr__value: "0x080000000000000000000000", __single_transformer_blocks.26_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.26_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.26_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.26_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.26_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.26_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.26_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.26_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.26_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.26_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.26_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.26_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.26_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.26_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.26_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_20108_attr__value: "0x080000000000000000000000", __single_transformer_blocks.26_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_20111_attr__value: "0x080000000000000000000000", __single_transformer_blocks.26_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.27_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.27_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.27_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.27_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.27_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.27_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.27_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.27_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.27_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.27_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.27_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.27_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.27_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.27_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.27_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.27_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.27_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_20183_attr__value: "0x080000000000000000000000", __single_transformer_blocks.27_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.27_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_20187_attr__value: "0x080000000000000000000000", _Constant_20190_attr__value: "0x080000000000000000000000", __single_transformer_blocks.27_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.27_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_20194_attr__value: "0x080000000000000000000000", _Constant_20197_attr__value: "0x080000000000000000000000", __single_transformer_blocks.27_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.27_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_20201_attr__value: "0x080000000000000000000000", __single_transformer_blocks.27_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.27_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.27_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.27_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.27_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.27_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.27_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.27_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.27_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_20245_attr__value: "0x080000000000000000000000", _Constant_20247_attr__value: "0x080000000000000000000000", _Constant_20249_attr__value: "0x080000000000000000000000", __single_transformer_blocks.27_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.27_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.27_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.27_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.27_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.27_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.27_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.27_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.27_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.27_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.27_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.27_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.27_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.27_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_20290_attr__value: "0x080000000000000000000000", _Constant_20292_attr__value: "0x080000000000000000000000", _Constant_20294_attr__value: "0x080000000000000000000000", __single_transformer_blocks.27_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.27_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.27_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.27_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.27_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.27_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.27_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.27_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.27_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.27_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.27_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.27_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.27_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.27_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.27_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_20347_attr__value: "0x080000000000000000000000", __single_transformer_blocks.27_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_20350_attr__value: "0x080000000000000000000000", __single_transformer_blocks.27_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.28_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.28_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.28_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.28_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.28_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.28_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.28_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.28_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.28_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.28_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.28_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.28_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.28_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.28_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.28_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.28_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.28_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_20422_attr__value: "0x080000000000000000000000", __single_transformer_blocks.28_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.28_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_20426_attr__value: "0x080000000000000000000000", _Constant_20429_attr__value: "0x080000000000000000000000", __single_transformer_blocks.28_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.28_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_20433_attr__value: "0x080000000000000000000000", _Constant_20436_attr__value: "0x080000000000000000000000", __single_transformer_blocks.28_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.28_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_20440_attr__value: "0x080000000000000000000000", __single_transformer_blocks.28_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.28_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.28_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.28_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.28_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.28_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.28_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.28_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.28_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_20484_attr__value: "0x080000000000000000000000", _Constant_20486_attr__value: "0x080000000000000000000000", _Constant_20488_attr__value: "0x080000000000000000000000", __single_transformer_blocks.28_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.28_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.28_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.28_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.28_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.28_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.28_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.28_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.28_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.28_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.28_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.28_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.28_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.28_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_20529_attr__value: "0x080000000000000000000000", _Constant_20531_attr__value: "0x080000000000000000000000", _Constant_20533_attr__value: "0x080000000000000000000000", __single_transformer_blocks.28_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.28_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.28_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.28_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.28_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.28_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.28_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.28_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.28_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.28_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.28_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.28_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.28_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.28_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.28_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_20586_attr__value: "0x080000000000000000000000", __single_transformer_blocks.28_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_20589_attr__value: "0x080000000000000000000000", __single_transformer_blocks.28_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.29_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.29_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.29_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.29_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.29_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.29_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.29_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.29_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.29_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.29_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.29_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.29_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.29_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.29_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.29_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.29_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.29_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_20661_attr__value: "0x080000000000000000000000", __single_transformer_blocks.29_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.29_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_20665_attr__value: "0x080000000000000000000000", _Constant_20668_attr__value: "0x080000000000000000000000", __single_transformer_blocks.29_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.29_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_20672_attr__value: "0x080000000000000000000000", _Constant_20675_attr__value: "0x080000000000000000000000", __single_transformer_blocks.29_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.29_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_20679_attr__value: "0x080000000000000000000000", __single_transformer_blocks.29_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.29_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.29_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.29_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.29_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.29_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.29_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.29_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.29_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_20723_attr__value: "0x080000000000000000000000", _Constant_20725_attr__value: "0x080000000000000000000000", _Constant_20727_attr__value: "0x080000000000000000000000", __single_transformer_blocks.29_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.29_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.29_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.29_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.29_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.29_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.29_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.29_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.29_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.29_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.29_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.29_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.29_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.29_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_20768_attr__value: "0x080000000000000000000000", _Constant_20770_attr__value: "0x080000000000000000000000", _Constant_20772_attr__value: "0x080000000000000000000000", __single_transformer_blocks.29_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.29_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.29_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.29_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.29_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.29_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.29_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.29_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.29_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.29_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.29_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.29_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.29_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.29_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.29_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_20825_attr__value: "0x080000000000000000000000", __single_transformer_blocks.29_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_20828_attr__value: "0x080000000000000000000000", __single_transformer_blocks.29_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.30_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.30_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.30_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.30_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.30_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.30_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.30_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.30_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.30_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.30_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.30_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.30_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.30_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.30_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.30_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.30_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.30_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_20900_attr__value: "0x080000000000000000000000", __single_transformer_blocks.30_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.30_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_20904_attr__value: "0x080000000000000000000000", _Constant_20907_attr__value: "0x080000000000000000000000", __single_transformer_blocks.30_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.30_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_20911_attr__value: "0x080000000000000000000000", _Constant_20914_attr__value: "0x080000000000000000000000", __single_transformer_blocks.30_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.30_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_20918_attr__value: "0x080000000000000000000000", __single_transformer_blocks.30_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.30_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.30_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.30_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.30_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.30_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.30_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.30_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.30_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_20962_attr__value: "0x080000000000000000000000", _Constant_20964_attr__value: "0x080000000000000000000000", _Constant_20966_attr__value: "0x080000000000000000000000", __single_transformer_blocks.30_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.30_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.30_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.30_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.30_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.30_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.30_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.30_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.30_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.30_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.30_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.30_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.30_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.30_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_21007_attr__value: "0x080000000000000000000000", _Constant_21009_attr__value: "0x080000000000000000000000", _Constant_21011_attr__value: "0x080000000000000000000000", __single_transformer_blocks.30_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.30_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.30_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.30_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.30_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.30_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.30_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.30_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.30_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.30_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.30_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.30_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.30_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.30_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.30_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_21064_attr__value: "0x080000000000000000000000", __single_transformer_blocks.30_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_21067_attr__value: "0x080000000000000000000000", __single_transformer_blocks.30_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.31_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.31_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.31_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.31_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.31_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.31_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.31_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.31_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.31_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.31_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.31_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.31_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.31_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.31_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.31_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.31_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.31_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_21139_attr__value: "0x080000000000000000000000", __single_transformer_blocks.31_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.31_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_21143_attr__value: "0x080000000000000000000000", _Constant_21146_attr__value: "0x080000000000000000000000", __single_transformer_blocks.31_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.31_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_21150_attr__value: "0x080000000000000000000000", _Constant_21153_attr__value: "0x080000000000000000000000", __single_transformer_blocks.31_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.31_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_21157_attr__value: "0x080000000000000000000000", __single_transformer_blocks.31_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.31_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.31_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.31_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.31_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.31_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.31_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.31_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.31_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_21201_attr__value: "0x080000000000000000000000", _Constant_21203_attr__value: "0x080000000000000000000000", _Constant_21205_attr__value: "0x080000000000000000000000", __single_transformer_blocks.31_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.31_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.31_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.31_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.31_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.31_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.31_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.31_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.31_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.31_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.31_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.31_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.31_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.31_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_21246_attr__value: "0x080000000000000000000000", _Constant_21248_attr__value: "0x080000000000000000000000", _Constant_21250_attr__value: "0x080000000000000000000000", __single_transformer_blocks.31_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.31_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.31_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.31_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.31_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.31_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.31_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.31_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.31_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.31_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.31_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.31_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.31_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.31_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.31_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_21303_attr__value: "0x080000000000000000000000", __single_transformer_blocks.31_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_21306_attr__value: "0x080000000000000000000000", __single_transformer_blocks.31_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.32_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.32_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.32_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.32_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.32_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.32_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.32_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.32_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.32_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.32_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.32_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.32_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.32_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.32_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.32_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.32_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.32_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_21378_attr__value: "0x080000000000000000000000", __single_transformer_blocks.32_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.32_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_21382_attr__value: "0x080000000000000000000000", _Constant_21385_attr__value: "0x080000000000000000000000", __single_transformer_blocks.32_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.32_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_21389_attr__value: "0x080000000000000000000000", _Constant_21392_attr__value: "0x080000000000000000000000", __single_transformer_blocks.32_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.32_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_21396_attr__value: "0x080000000000000000000000", __single_transformer_blocks.32_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.32_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.32_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.32_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.32_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.32_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.32_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.32_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.32_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_21440_attr__value: "0x080000000000000000000000", _Constant_21442_attr__value: "0x080000000000000000000000", _Constant_21444_attr__value: "0x080000000000000000000000", __single_transformer_blocks.32_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.32_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.32_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.32_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.32_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.32_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.32_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.32_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.32_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.32_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.32_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.32_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.32_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.32_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_21485_attr__value: "0x080000000000000000000000", _Constant_21487_attr__value: "0x080000000000000000000000", _Constant_21489_attr__value: "0x080000000000000000000000", __single_transformer_blocks.32_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.32_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.32_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.32_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.32_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.32_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.32_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.32_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.32_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.32_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.32_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.32_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.32_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.32_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.32_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_21542_attr__value: "0x080000000000000000000000", __single_transformer_blocks.32_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_21545_attr__value: "0x080000000000000000000000", __single_transformer_blocks.32_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.33_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.33_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.33_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.33_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.33_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.33_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.33_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.33_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.33_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.33_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.33_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.33_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.33_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.33_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.33_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.33_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.33_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_21617_attr__value: "0x080000000000000000000000", __single_transformer_blocks.33_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.33_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_21621_attr__value: "0x080000000000000000000000", _Constant_21624_attr__value: "0x080000000000000000000000", __single_transformer_blocks.33_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.33_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_21628_attr__value: "0x080000000000000000000000", _Constant_21631_attr__value: "0x080000000000000000000000", __single_transformer_blocks.33_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.33_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_21635_attr__value: "0x080000000000000000000000", __single_transformer_blocks.33_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.33_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.33_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.33_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.33_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.33_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.33_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.33_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.33_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_21679_attr__value: "0x080000000000000000000000", _Constant_21681_attr__value: "0x080000000000000000000000", _Constant_21683_attr__value: "0x080000000000000000000000", __single_transformer_blocks.33_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.33_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.33_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.33_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.33_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.33_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.33_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.33_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.33_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.33_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.33_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.33_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.33_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.33_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_21724_attr__value: "0x080000000000000000000000", _Constant_21726_attr__value: "0x080000000000000000000000", _Constant_21728_attr__value: "0x080000000000000000000000", __single_transformer_blocks.33_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.33_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.33_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.33_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.33_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.33_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.33_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.33_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.33_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.33_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.33_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.33_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.33_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.33_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.33_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_21781_attr__value: "0x080000000000000000000000", __single_transformer_blocks.33_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_21784_attr__value: "0x080000000000000000000000", __single_transformer_blocks.33_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.34_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.34_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.34_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.34_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.34_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.34_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.34_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.34_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.34_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.34_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.34_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.34_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.34_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.34_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.34_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.34_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.34_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_21856_attr__value: "0x080000000000000000000000", __single_transformer_blocks.34_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.34_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_21860_attr__value: "0x080000000000000000000000", _Constant_21863_attr__value: "0x080000000000000000000000", __single_transformer_blocks.34_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.34_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_21867_attr__value: "0x080000000000000000000000", _Constant_21870_attr__value: "0x080000000000000000000000", __single_transformer_blocks.34_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.34_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_21874_attr__value: "0x080000000000000000000000", __single_transformer_blocks.34_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.34_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.34_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.34_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.34_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.34_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.34_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.34_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.34_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_21918_attr__value: "0x080000000000000000000000", _Constant_21920_attr__value: "0x080000000000000000000000", _Constant_21922_attr__value: "0x080000000000000000000000", __single_transformer_blocks.34_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.34_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.34_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.34_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.34_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.34_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.34_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.34_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.34_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.34_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.34_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.34_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.34_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.34_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_21963_attr__value: "0x080000000000000000000000", _Constant_21965_attr__value: "0x080000000000000000000000", _Constant_21967_attr__value: "0x080000000000000000000000", __single_transformer_blocks.34_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.34_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.34_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.34_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.34_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.34_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.34_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.34_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.34_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.34_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.34_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.34_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.34_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.34_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.34_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_22020_attr__value: "0x080000000000000000000000", __single_transformer_blocks.34_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_22023_attr__value: "0x080000000000000000000000", __single_transformer_blocks.34_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.35_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.35_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.35_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.35_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.35_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.35_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.35_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.35_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.35_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.35_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.35_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.35_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.35_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.35_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.35_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.35_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.35_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_22095_attr__value: "0x080000000000000000000000", __single_transformer_blocks.35_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.35_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_22099_attr__value: "0x080000000000000000000000", _Constant_22102_attr__value: "0x080000000000000000000000", __single_transformer_blocks.35_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.35_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_22106_attr__value: "0x080000000000000000000000", _Constant_22109_attr__value: "0x080000000000000000000000", __single_transformer_blocks.35_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.35_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_22113_attr__value: "0x080000000000000000000000", __single_transformer_blocks.35_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.35_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.35_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.35_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.35_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.35_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.35_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.35_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.35_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_22157_attr__value: "0x080000000000000000000000", _Constant_22159_attr__value: "0x080000000000000000000000", _Constant_22161_attr__value: "0x080000000000000000000000", __single_transformer_blocks.35_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.35_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.35_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.35_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.35_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.35_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.35_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.35_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.35_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.35_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.35_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.35_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.35_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.35_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_22202_attr__value: "0x080000000000000000000000", _Constant_22204_attr__value: "0x080000000000000000000000", _Constant_22206_attr__value: "0x080000000000000000000000", __single_transformer_blocks.35_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.35_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.35_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.35_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.35_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.35_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.35_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.35_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.35_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.35_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.35_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.35_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.35_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.35_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.35_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_22259_attr__value: "0x080000000000000000000000", __single_transformer_blocks.35_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_22262_attr__value: "0x080000000000000000000000", __single_transformer_blocks.35_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.36_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.36_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.36_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.36_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.36_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.36_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.36_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.36_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.36_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.36_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.36_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.36_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.36_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.36_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.36_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.36_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.36_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_22334_attr__value: "0x080000000000000000000000", __single_transformer_blocks.36_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.36_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_22338_attr__value: "0x080000000000000000000000", _Constant_22341_attr__value: "0x080000000000000000000000", __single_transformer_blocks.36_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.36_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_22345_attr__value: "0x080000000000000000000000", _Constant_22348_attr__value: "0x080000000000000000000000", __single_transformer_blocks.36_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.36_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_22352_attr__value: "0x080000000000000000000000", __single_transformer_blocks.36_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.36_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.36_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.36_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.36_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.36_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.36_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.36_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.36_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_22396_attr__value: "0x080000000000000000000000", _Constant_22398_attr__value: "0x080000000000000000000000", _Constant_22400_attr__value: "0x080000000000000000000000", __single_transformer_blocks.36_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.36_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.36_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.36_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.36_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.36_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.36_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.36_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.36_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.36_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.36_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.36_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.36_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.36_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_22441_attr__value: "0x080000000000000000000000", _Constant_22443_attr__value: "0x080000000000000000000000", _Constant_22445_attr__value: "0x080000000000000000000000", __single_transformer_blocks.36_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.36_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.36_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.36_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.36_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.36_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.36_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.36_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.36_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.36_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.36_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.36_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.36_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.36_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.36_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_22498_attr__value: "0x080000000000000000000000", __single_transformer_blocks.36_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_22501_attr__value: "0x080000000000000000000000", __single_transformer_blocks.36_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.37_norm_Constant_attr__value: "0x080000000100000000000000", __single_transformer_blocks.37_norm_Constant_1_attr__value: "0x080000000000000000000000", __single_transformer_blocks.37_norm_Constant_2_attr__value: "0x080000000200000000000000", __single_transformer_blocks.37_norm_Constant_3_attr__value: "0x080000000300000000000000", __single_transformer_blocks.37_norm_Constant_4_attr__value: "0x080000000100000000000000", __single_transformer_blocks.37_norm_Constant_5_attr__value: "0x080000000200000000000000", __single_transformer_blocks.37_norm_Constant_6_attr__value: "0x080000000300000000000000", __single_transformer_blocks.37_norm_Constant_7_attr__value: "0x080000000100000000000000", __single_transformer_blocks.37_norm_Constant_8_attr__value: "0x08000000803F", __single_transformer_blocks.37_norm_Constant_9_attr__value: "0x080000000100000000000000", __single_transformer_blocks.37_act_mlp_Constant_attr__value: "0x08000000373D", __single_transformer_blocks.37_act_mlp_Constant_1_attr__value: "0x080000004C3F", __single_transformer_blocks.37_act_mlp_Constant_2_attr__value: "0x08000000803F", __single_transformer_blocks.37_act_mlp_Constant_3_attr__value: "0x08000000003F", __single_transformer_blocks.37_attn_Constant_attr__value: "0x080000000000000000000000", __single_transformer_blocks.37_attn_Constant_1_attr__value: "0x080000000200000000000000", __single_transformer_blocks.37_attn_Constant_2_attr__value: "0x080000001800000000000000", _Constant_22573_attr__value: "0x080000000000000000000000", __single_transformer_blocks.37_attn_Constant_3_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.37_attn_Constant_4_attr__value: "0x080000001800000000000000", _Constant_22577_attr__value: "0x080000000000000000000000", _Constant_22580_attr__value: "0x080000000000000000000000", __single_transformer_blocks.37_attn_Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.37_attn_Constant_6_attr__value: "0x080000001800000000000000", _Constant_22584_attr__value: "0x080000000000000000000000", _Constant_22587_attr__value: "0x080000000000000000000000", __single_transformer_blocks.37_attn_Constant_7_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.37_attn_Constant_8_attr__value: "0x080000001800000000000000", _Constant_22591_attr__value: "0x080000000000000000000000", __single_transformer_blocks.37_attn_norm_q_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.37_attn_norm_q_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.37_attn_norm_q_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.37_attn_norm_k_Constant_attr__value: "0x0800000000000040", __single_transformer_blocks.37_attn_norm_k_Constant_1_attr__value: "0x08000000BD378635", __single_transformer_blocks.37_attn_norm_k_Constant_2_attr__value: "0x080000000000803F", __single_transformer_blocks.37_attn_Constant_9_attr__value: "0x080000000000000000000000", __single_transformer_blocks.37_attn_Constant_10_attr__value: "0x080000000100000000000000", __single_transformer_blocks.37_attn_Constant_11_attr__value: "0x080000000200000000000000", _Constant_22635_attr__value: "0x080000000000000000000000", _Constant_22637_attr__value: "0x080000000000000000000000", _Constant_22639_attr__value: "0x080000000000000000000000", __single_transformer_blocks.37_attn_Constant_12_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.37_attn_Constant_13_attr__value: "0x080000000200000000000000", __single_transformer_blocks.37_attn_Constant_14_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.37_attn_Constant_15_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.37_attn_Constant_16_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.37_attn_Constant_17_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.37_attn_Constant_18_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.37_attn_Constant_19_attr__value: "0x080000000000000000000000", __single_transformer_blocks.37_attn_Constant_20_attr__value: "0x080000000000000000000000", __single_transformer_blocks.37_attn_Constant_21_attr__value: "0x080000000300000000000000", __single_transformer_blocks.37_attn_Constant_22_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.37_attn_Constant_23_attr__value: "0x080000000000000000000000", __single_transformer_blocks.37_attn_Constant_24_attr__value: "0x080000000100000000000000", __single_transformer_blocks.37_attn_Constant_25_attr__value: "0x080000000200000000000000", _Constant_22680_attr__value: "0x080000000000000000000000", _Constant_22682_attr__value: "0x080000000000000000000000", _Constant_22684_attr__value: "0x080000000000000000000000", __single_transformer_blocks.37_attn_Constant_26_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.37_attn_Constant_27_attr__value: "0x080000000200000000000000", __single_transformer_blocks.37_attn_Constant_28_attr__value: "0x0800000001000000000000000100000000000000", __single_transformer_blocks.37_attn_Constant_29_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.37_attn_Constant_30_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.37_attn_Constant_31_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.37_attn_Constant_32_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.37_attn_Constant_33_attr__value: "0x080000000000000000000000", __single_transformer_blocks.37_attn_Constant_34_attr__value: "0x080000000000000000000000", __single_transformer_blocks.37_attn_Constant_35_attr__value: "0x080000000300000000000000", __single_transformer_blocks.37_attn_Constant_36_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.37_attn_Constant_37_attr__value: "0x08000000FFFFFFFFFFFFFFFF", __single_transformer_blocks.37_attn_Constant_38_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __single_transformer_blocks.37_attn_Constant_39_attr__value: "0x080000000000803F", __single_transformer_blocks.37_attn_Constant_40_attr__value: "0x080000001800000000000000", _Constant_22737_attr__value: "0x080000000000000000000000", __single_transformer_blocks.37_attn_Constant_41_attr__value: "0x08000000FFFFFFFFFFFFFFFF", _Constant_22740_attr__value: "0x080000000000000000000000", __single_transformer_blocks.37_Constant_attr__value: "0x080000000100000000000000", __Constant_2_attr__value: "0x080000000100000000000000", __Constant_3_attr__value: "0x080000000100000000000000", __Constant_4_attr__value: "0x080000000000000000000000", __Constant_5_attr__value: "0x08000000FFFFFFFFFFFFFF7F", __Constant_6_attr__value: "0x080000000100000000000000", __norm_out_Constant_attr__value: "0x080000000100000000000000", __norm_out_Constant_1_attr__value: "0x080000000000000000000000", __norm_out_Constant_2_attr__value: "0x080000000100000000000000", __norm_out_Constant_3_attr__value: "0x080000000200000000000000", __norm_out_Constant_4_attr__value: "0x080000000100000000000000", __norm_out_Constant_5_attr__value: "0x080000000200000000000000", __norm_out_Constant_6_attr__value: "0x08000000803F", __norm_out_Constant_7_attr__value: "0x080000000100000000000000", __norm_out_Constant_8_attr__value: "0x080000000100000000000000" } } #-}