LLIR ; ModuleID = 'LLVMDialectModule' source_filename = "LLVMDialectModule" define void @add_kernel_0d1d2d3de(ptr addrspace(1) %0, ptr addrspace(1) %1, ptr addrspace(1) %2, i32 %3) local_unnamed_addr !dbg !5 { %5 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !dbg !8 %6 = shl i32 %5, 2, !dbg !8 %7 = and i32 %6, 508, !dbg !8 %8 = tail call i32 asm "mov.u32 $0, %ctaid.x;", "=r"() #1, !dbg !9 %9 = shl i32 %8, 10, !dbg !10 %10 = or i32 %9, %7, !dbg !11 %11 = or i32 %10, 512, !dbg !11 %12 = icmp slt i32 %10, %3, !dbg !12 %13 = icmp slt i32 %11, %3, !dbg !12 %14 = sext i32 %10 to i64, !dbg !13 %15 = getelementptr float, ptr addrspace(1) %0, i64 %14, !dbg !13 %16 = sext i32 %11 to i64, !dbg !13 %17 = getelementptr float, ptr addrspace(1) %0, i64 %16, !dbg !13 %18 = tail call { i32, i32, i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09mov.u32 $2, 0x0;\0A\09mov.u32 $3, 0x0;\0A\09@$5 ld.global.v4.b32 { $0, $1, $2, $3 }, [ $4 + 0 ];", "=r,=r,=r,=r,l,b"(ptr addrspace(1) %15, i1 %12) #1, !dbg !14 %19 = extractvalue { i32, i32, i32, i32 } %18, 0, !dbg !14 %20 = extractvalue { i32, i32, i32, i32 } %18, 1, !dbg !14 %21 = extractvalue { i32, i32, i32, i32 } %18, 2, !dbg !14 %22 = extractvalue { i32, i32, i32, i32 } %18, 3, !dbg !14 %23 = bitcast i32 %19 to float, !dbg !14 %24 = bitcast i32 %20 to float, !dbg !14 %25 = bitcast i32 %21 to float, !dbg !14 %26 = bitcast i32 %22 to float, !dbg !14 %27 = tail call { i32, i32, i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09mov.u32 $2, 0x0;\0A\09mov.u32 $3, 0x0;\0A\09@$5 ld.global.v4.b32 { $0, $1, $2, $3 }, [ $4 + 0 ];", "=r,=r,=r,=r,l,b"(ptr addrspace(1) %17, i1 %13) #1, !dbg !14 %28 = extractvalue { i32, i32, i32, i32 } %27, 0, !dbg !14 %29 = extractvalue { i32, i32, i32, i32 } %27, 1, !dbg !14 %30 = extractvalue { i32, i32, i32, i32 } %27, 2, !dbg !14 %31 = extractvalue { i32, i32, i32, i32 } %27, 3, !dbg !14 %32 = bitcast i32 %28 to float, !dbg !14 %33 = bitcast i32 %29 to float, !dbg !14 %34 = bitcast i32 %30 to float, !dbg !14 %35 = bitcast i32 %31 to float, !dbg !14 %36 = getelementptr float, ptr addrspace(1) %1, i64 %14, !dbg !15 %37 = getelementptr float, ptr addrspace(1) %1, i64 %16, !dbg !15 %38 = tail call { i32, i32, i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09mov.u32 $2, 0x0;\0A\09mov.u32 $3, 0x0;\0A\09@$5 ld.global.v4.b32 { $0, $1, $2, $3 }, [ $4 + 0 ];", "=r,=r,=r,=r,l,b"(ptr addrspace(1) %36, i1 %12) #1, !dbg !16 %39 = extractvalue { i32, i32, i32, i32 } %38, 0, !dbg !16 %40 = extractvalue { i32, i32, i32, i32 } %38, 1, !dbg !16 %41 = extractvalue { i32, i32, i32, i32 } %38, 2, !dbg !16 %42 = extractvalue { i32, i32, i32, i32 } %38, 3, !dbg !16 %43 = bitcast i32 %39 to float, !dbg !16 %44 = bitcast i32 %40 to float, !dbg !16 %45 = bitcast i32 %41 to float, !dbg !16 %46 = bitcast i32 %42 to float, !dbg !16 %47 = tail call { i32, i32, i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09mov.u32 $2, 0x0;\0A\09mov.u32 $3, 0x0;\0A\09@$5 ld.global.v4.b32 { $0, $1, $2, $3 }, [ $4 + 0 ];", "=r,=r,=r,=r,l,b"(ptr addrspace(1) %37, i1 %13) #1, !dbg !16 %48 = extractvalue { i32, i32, i32, i32 } %47, 0, !dbg !16 %49 = extractvalue { i32, i32, i32, i32 } %47, 1, !dbg !16 %50 = extractvalue { i32, i32, i32, i32 } %47, 2, !dbg !16 %51 = extractvalue { i32, i32, i32, i32 } %47, 3, !dbg !16 %52 = bitcast i32 %48 to float, !dbg !16 %53 = bitcast i32 %49 to float, !dbg !16 %54 = bitcast i32 %50 to float, !dbg !16 %55 = bitcast i32 %51 to float, !dbg !16 %56 = fadd float %23, %43, !dbg !17 %57 = fadd float %24, %44, !dbg !17 %58 = fadd float %25, %45, !dbg !17 %59 = fadd float %26, %46, !dbg !17 %60 = fadd float %32, %52, !dbg !17 %61 = fadd float %33, %53, !dbg !17 %62 = fadd float %34, %54, !dbg !17 %63 = fadd float %35, %55, !dbg !17 %64 = getelementptr float, ptr addrspace(1) %2, i64 %14, !dbg !18 %65 = getelementptr float, ptr addrspace(1) %2, i64 %16, !dbg !18 %66 = bitcast float %56 to i32, !dbg !19 %67 = bitcast float %57 to i32, !dbg !19 %68 = bitcast float %58 to i32, !dbg !19 %69 = bitcast float %59 to i32, !dbg !19 tail call void asm sideeffect "@$5 st.global.v4.b32 [ $4 + 0 ], { $0, $1, $2, $3 };", "r,r,r,r,l,b"(i32 %66, i32 %67, i32 %68, i32 %69, ptr addrspace(1) %64, i1 %12) #1, !dbg !19 %70 = bitcast float %60 to i32, !dbg !19 %71 = bitcast float %61 to i32, !dbg !19 %72 = bitcast float %62 to i32, !dbg !19 %73 = bitcast float %63 to i32, !dbg !19 tail call void asm sideeffect "@$5 st.global.v4.b32 [ $4 + 0 ], { $0, $1, $2, $3 };", "r,r,r,r,l,b"(i32 %70, i32 %71, i32 %72, i32 %73, ptr addrspace(1) %65, i1 %13) #1, !dbg !19 ret void, !dbg !20 } ; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) declare noundef i32 @llvm.nvvm.read.ptx.sreg.tid.x() #0 attributes #0 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) } attributes #1 = { nounwind } !llvm.module.flags = !{!0} !llvm.dbg.cu = !{!1} !nvvm.annotations = !{!3, !4, !4, !3} !0 = !{i32 2, !"Debug Info Version", i32 3} !1 = distinct !DICompileUnit(language: DW_LANG_C, file: !2, producer: "triton", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) !2 = !DIFile(filename: "vector_addition.py", directory: "/home/kdidi/projects/triton/kd_scripts") !3 = !{ptr @add_kernel_0d1d2d3de, !"kernel", i32 1} !4 = !{ptr @add_kernel_0d1d2d3de, !"maxntidx", i32 128} !5 = distinct !DISubprogram(name: "add_kernel_0d1d2d3de", linkageName: "add_kernel_0d1d2d3de", scope: !2, file: !2, line: 5, type: !6, scopeLine: 5, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !1) !6 = !DISubroutineType(cc: DW_CC_normal, types: !7) !7 = !{} !8 = !DILocation(line: 8, column: 41, scope: !5) !9 = !DILocation(line: 6, column: 24, scope: !5) !10 = !DILocation(line: 7, column: 24, scope: !5) !11 = !DILocation(line: 8, column: 28, scope: !5) !12 = !DILocation(line: 9, column: 21, scope: !5) !13 = !DILocation(line: 10, column: 24, scope: !5) !14 = !DILocation(line: 10, column: 16, scope: !5) !15 = !DILocation(line: 11, column: 24, scope: !5) !16 = !DILocation(line: 11, column: 16, scope: !5) !17 = !DILocation(line: 12, column: 17, scope: !5) !18 = !DILocation(line: 13, column: 26, scope: !5) !19 = !DILocation(line: 13, column: 35, scope: !5) !20 = !DILocation(line: 13, column: 4, scope: !5)