From 830163e90f8e70d7c83913f82d1340586e0c607a Mon Sep 17 00:00:00 2001 From: Nicolas Lang Date: Mon, 27 Nov 2023 13:19:26 +0100 Subject: [PATCH 1/6] added rng arguments to HMC function --- src/YM/YMfields.jl | 6 +++--- src/YM/YMhmc.jl | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/YM/YMfields.jl b/src/YM/YMfields.jl index c6e5433..d853c49 100644 --- a/src/YM/YMfields.jl +++ b/src/YM/YMfields.jl @@ -9,11 +9,11 @@ ### created: Thu Jul 15 15:16:47 2021 ### -function randomize!(f, lp::SpaceParm, ymws::YMworkspace) +function randomize!(f, lp::SpaceParm, ymws::YMworkspace; curng=CUDA.default_rng()) if ymws.ALG == SU2alg @timeit "Randomize SU(2) algebra field" begin - m = CUDA.randn(ymws.PRC, lp.bsz,lp.ndim,3,lp.rsz) + m = Random.randn(curng, ymws.PRC, lp.bsz,lp.ndim,3,lp.rsz) CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_SU2!(f,m,lp) end @@ -23,7 +23,7 @@ function randomize!(f, lp::SpaceParm, ymws::YMworkspace) if ymws.ALG == SU3alg @timeit "Randomize SU(3) algebra field" begin - m = CUDA.randn(ymws.PRC, lp.bsz,lp.ndim,8,lp.rsz) + m = Random.randn(curng, ymws.PRC, lp.bsz,lp.ndim,8,lp.rsz) CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_SU3!(f,m,lp) end diff --git a/src/YM/YMhmc.jl b/src/YM/YMhmc.jl index c973875..81a93c0 100644 --- a/src/YM/YMhmc.jl +++ b/src/YM/YMhmc.jl @@ -58,13 +58,13 @@ function hamiltonian(mom, U, lp, gp, ymws) return K+V end -function HMC!(U, int::IntrScheme, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace{T}; noacc=false) where T +function HMC!(U, int::IntrScheme, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace{T}; noacc=false, rng=Random.default_rng(), curng=CUDA.default_rng()) where T @timeit "HMC trayectory" begin ymws.U1 .= U - randomize!(ymws.mom, lp, ymws) + randomize!(ymws.mom, lp, ymws; curng) hini = hamiltonian(ymws.mom, U, lp, gp, ymws) MD!(ymws.mom, U, int, lp, gp, ymws) @@ -78,7 +78,7 @@ function HMC!(U, int::IntrScheme, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspac end if (pacc < 1.0) - r = rand() + r = rand(rng) if (pacc < r) U .= ymws.U1 acc = false @@ -90,7 +90,7 @@ function HMC!(U, int::IntrScheme, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspac end return dh, acc end -HMC!(U, eps, ns, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace{T}; noacc=false) where T = HMC!(U, omf4(T, eps, ns), lp, gp, ymws; noacc=noacc) +HMC!(U, eps, ns, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace{T}; noacc=false, rng=Random.default_rng(), curng=CUDA.default_rng()) where T = HMC!(U, omf4(T, eps, ns), lp, gp, ymws; noacc=noacc, rng, curng) function MD!(mom, U, int::IntrScheme{NI, T}, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace{T}) where {NI, T <: AbstractFloat} From 094390c1eea6142799e508fa8c1dd59869726c2c Mon Sep 17 00:00:00 2001 From: Nicolas Lang Date: Fri, 2 May 2025 16:30:19 +0200 Subject: [PATCH 2/6] factor of 1/2 on spatial bnd links for plaquette action density (matches openQCD) --- src/YM/YMflow.jl | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/YM/YMflow.jl b/src/YM/YMflow.jl index 42ed545..2ba3d23 100644 --- a/src/YM/YMflow.jl +++ b/src/YM/YMflow.jl @@ -303,6 +303,10 @@ function Eoft_plaq(Eslc, U, gp::GaugeParm{T,G,NN}, lp::SpaceParm{N,M,B,D}, ymws: for it in 1:lp.iL[end] Eslc[it,ipl] = 2*Etmp[it] end + if OBC ## Spatial plaquettes at time boundary count half (Luescher) + Eslc[1,ipl] = Etmp[1] + Eslc[end,ipl] = Etmp[end] + end end end @@ -403,6 +407,12 @@ function Eoft_clover(Eslc, U, gp::GaugeParm, lp::SpaceParm{4,M,B,D}, ymws::YMwor for it in 1:lp.iL[end] Eslc[it,ipl1] = Etmp[it]/8 end + + ## tentative (weight 1/2 on spatial planes on boundary) + #if (B == BC_OPEN) && (ipl1 >= 4) + # Eslc[end,ipl1] = Etmp[end]/16 + # Eslc[1,ipl1] = Etmp[1]/16 + #end CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_add_et!(ymws.rm, ymws.frc2, lp) @@ -412,6 +422,12 @@ function Eoft_clover(Eslc, U, gp::GaugeParm, lp::SpaceParm{4,M,B,D}, ymws::YMwor Eslc[it,ipl2] = Etmp[it]/8 end + ## tentative (weight 1/2 on spatial planes on boundary) + #if (B == BC_OPEN) && (ipl2 >= 4) + # Eslc[end,ipl2] = Etmp[end]/16 + # Eslc[1,ipl2] = Etmp[1]/16 + #end + return nothing end From 58685f63a243c06ab0a3b7209ccbc3225b4c448c Mon Sep 17 00:00:00 2001 From: Nicolas Lang Date: Tue, 20 May 2025 16:58:19 +0200 Subject: [PATCH 3/6] changed time-boundary of Eoft_clover and Eoft_plaq to match openQCD --- src/YM/YMflow.jl | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/src/YM/YMflow.jl b/src/YM/YMflow.jl index 8004653..92c8e2c 100644 --- a/src/YM/YMflow.jl +++ b/src/YM/YMflow.jl @@ -430,29 +430,25 @@ function Eoft_clover(Eslc, U, gp::GaugeParm, lp::SpaceParm{4,M,B,D}, ymws::YMwor end Etmp .= reshape(Array(CUDA.reduce(+, ymws.rm;dims=tp)),lp.iL[end])/V3 for it in 1:lp.iL[end] - Eslc[it,ipl1] = Etmp[it]/8 + if (B == BC_OPEN) && (it == 1 || it == lp.iL[end]) + Eslc[it,ipl1] = 0.0 + else + Eslc[it,ipl1] = Etmp[it]/8 + end end - ## tentative (weight 1/2 on spatial planes on boundary) - #if (B == BC_OPEN) && (ipl1 >= 4) - # Eslc[end,ipl1] = Etmp[end]/16 - # Eslc[1,ipl1] = Etmp[1]/16 - #end - CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_add_et!(ymws.rm, ymws.frc2, lp) end Etmp .= reshape(Array(CUDA.reduce(+, ymws.rm;dims=tp)),lp.iL[end])/V3 for it in 1:lp.iL[end] - Eslc[it,ipl2] = Etmp[it]/8 + if (B == BC_OPEN) && (it == 1 || it == lp.iL[end]) + Eslc[it,ipl2] = 0.0 + else + Eslc[it,ipl2] = Etmp[it]/8 + end end - ## tentative (weight 1/2 on spatial planes on boundary) - #if (B == BC_OPEN) && (ipl2 >= 4) - # Eslc[end,ipl2] = Etmp[end]/16 - # Eslc[1,ipl2] = Etmp[1]/16 - #end - return nothing end From feacb1fdd23ff332e29047af430d8a8569566ad3 Mon Sep 17 00:00:00 2001 From: "Fernando P. Panadero" Date: Fri, 23 May 2025 17:54:11 +0200 Subject: [PATCH 4/6] Proper flow for OBC in fermions. --- src/Dirac/Dirac.jl | 5 +++-- src/Dirac/DiracIO.jl | 12 +++++++++--- src/Dirac/Diracfields.jl | 10 +++++++++- src/Dirac/Diracflow.jl | 36 ++++++++++++++++++++++++++---------- src/Dirac/Diracoper.jl | 12 +++++++++++- src/Solvers/CG.jl | 6 +++--- src/Solvers/Propagators.jl | 10 +++++++++- src/Spinors/Spinors.jl | 9 +++++---- 8 files changed, 75 insertions(+), 25 deletions(-) diff --git a/src/Dirac/Dirac.jl b/src/Dirac/Dirac.jl index c11dc74..262ba42 100644 --- a/src/Dirac/Dirac.jl +++ b/src/Dirac/Dirac.jl @@ -1,9 +1,10 @@ ### ### "THE BEER-WARE LICENSE": -### Alberto Ramos and Carlos Pena wrote this file. As long as you retain this +### Alberto Ramos Carlos Pena and Fernando Panadero +### wrote this file. As long as you retain this ### notice you can do whatever you want with this stuff. If we meet some ### day, and you think this stuff is worth it, you can buy us a beer in -### return. +### return. ### ### file: Dirac.jl ### created: Thu Nov 18 17:20:24 2021 diff --git a/src/Dirac/DiracIO.jl b/src/Dirac/DiracIO.jl index 1342f70..a91c079 100644 --- a/src/Dirac/DiracIO.jl +++ b/src/Dirac/DiracIO.jl @@ -1,3 +1,12 @@ +### +### "THE BEER-WARE LICENSE": +### Fernando Panadero wrote this file based on Alberto Ramos' work. +### As long as you retain this notice you can do whatever you want with this stuff. If we meet some +### day, and you think this stuff is worth it, you can buy us a beer in +### return. +### +### file: DiracIO.jl +### """ read_prop(fname::String) @@ -73,7 +82,6 @@ function read_prop(fname::String) return CuArray(psicpu) end - """ save_prop(fname, psi, lp::SpaceParm, dpar::DiracParam; run::Union{Nothing,String}=nothing) @@ -131,8 +139,6 @@ function save_prop(fname::String, psi, lp::SpaceParm{4,M,B,D}, dpar::DiracParam; return nothing end - - """ read_dpar(fname::String) diff --git a/src/Dirac/Diracfields.jl b/src/Dirac/Diracfields.jl index 9274d11..86378cf 100644 --- a/src/Dirac/Diracfields.jl +++ b/src/Dirac/Diracfields.jl @@ -1,4 +1,12 @@ - +### +### "THE BEER-WARE LICENSE": +### Fernando Panadero wrote this file based on Alberto Ramos' work. +### As long as you retain this notice you can do whatever you want with this stuff. If we meet some +### day, and you think this stuff is worth it, you can buy us a beer in +### return. +### +### file: Diracfields.jl +### """ diff --git a/src/Dirac/Diracflow.jl b/src/Dirac/Diracflow.jl index ee4488e..575cda8 100644 --- a/src/Dirac/Diracflow.jl +++ b/src/Dirac/Diracflow.jl @@ -1,11 +1,24 @@ +### +### "THE BEER-WARE LICENSE": +### Fernando Panadero wrote this file based on Alberto Ramos' work. +### As long as you retain this notice you can do whatever you want with this stuff. If we meet some +### day, and you think this stuff is worth it, you can buy us a beer in +### return. +### +### file: Diracflow.jl +### import ..YM.flw, ..YM.force_gauge, ..YM.flw_adapt +""" + flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) +Integrates the flow equations with the integration scheme defined by `int` performing `ns` steps with fixed step size. The configuration `U` is overwritten. +""" function flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T} @timeit "Integrating flow equations" begin for i in 1:ns - force_gauge(ymws, U, int.c0, 1, gp, lp) + force_gauge_flw(ymws, U, int.c0, 1, gp, lp) if int.add_zth add_zth_term(ymws::YMworkspace, U, lp) @@ -18,7 +31,7 @@ function flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, dpar::D U .= expm.(U, ymws.mom, 2*eps*int.r) for k in 1:NI - force_gauge(ymws, U, int.c0, 1, gp, lp) + force_gauge_flw(ymws, U, int.c0, 1, gp, lp) if int.add_zth add_zth_term(ymws::YMworkspace, U, lp) @@ -41,14 +54,12 @@ flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, gp::GaugeParm, dpar::DiracParam, lp: """ function backflow(psi, U, Dt, nsave::Int64, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) -Performs the integration of the adjoint flow for the fermion field, according to 1302.5246. The fermion field must me that of the time-slice Dt and is flowed back to the first time-slice -nsave is the total number of gauge fields saved in the process +Performs the integration of the adjoint flow for the fermion field, according to 1302.5246. The fermion field must me that of the time-slice Dt and is flowed back to the first time-slice. +nsave is the total number of gauge fields saved in the process. The default integrator is wfl_rk3(Float64,0.01,1.0) with adaptive steps, it has to be order 3 rk but in can be zfl. """ function backflow(psi, U, Dt, maxnsave::Int64, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm,int::FlowIntr, ymws::YMworkspace, dws::DiracWorkspace) - # Default integrator is wfl_rk3(Float64,0.01,1.0), it has to be order 3 rk but in can be zfl - @timeit "Backflow integration" begin @timeit "GPU to CPU" U0 = Array(U) @@ -103,7 +114,7 @@ backflow(psi, U, Dt, maxnsave::Int64, gp::GaugeParm, dpar::DiracParam, lp::Space """ function bflw_step!(U, psi, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) -Performs ONE backstep in psi, from t to t-\eps. U is supposed to be the one in t-\eps and is left unchanged. So far, int has to be rk4 +Performs ONE backstep in psi, from t to t-\eps. U is supposed to be the one in t-\eps and is left unchanged. So far, int has to be rk3 """ function bflw_step!(psi, U, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) @@ -111,7 +122,7 @@ function bflw_step!(psi, U, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam @timeit "GPU to CPU" V = Array(U) - force_gauge(ymws, U, int.c0, 1, gp, lp) + force_gauge_flw(ymws, U, int.c0, 1, gp, lp) if int.add_zth add_zth_term(ymws::YMworkspace, U, lp) @@ -120,7 +131,7 @@ function bflw_step!(psi, U, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam ymws.mom .= ymws.frc1 U .= expm.(U, ymws.mom, 2*eps*int.r) - force_gauge(ymws, U, int.c0, 1, gp, lp) + force_gauge_flw(ymws, U, int.c0, 1, gp, lp) if int.add_zth add_zth_term(ymws::YMworkspace, U, lp) @@ -133,7 +144,7 @@ function bflw_step!(psi, U, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam @timeit "CPU to GPU" copyto!(U,V) - force_gauge(ymws, U, int.c0, 1, gp, lp) + force_gauge_flw(ymws, U, int.c0, 1, gp, lp) if int.add_zth add_zth_term(ymws::YMworkspace, U, lp) @@ -155,6 +166,11 @@ function bflw_step!(psi, U, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam end +""" + flw_adapt(U, psi, int::FlowIntr{NI,T}, tend::T, epsini::T, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) + +Integrates the flow equations with the integration scheme defined by `int` using the adaptive step size integrator up to `tend` with the tolerance defined in `int`. The configuration `U` is overwritten. +""" function flw_adapt(U, psi, int::FlowIntr{NI,T}, tend::T, epsini::T, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T} eps = epsini diff --git a/src/Dirac/Diracoper.jl b/src/Dirac/Diracoper.jl index d1d83d4..52c9cb6 100644 --- a/src/Dirac/Diracoper.jl +++ b/src/Dirac/Diracoper.jl @@ -1,4 +1,14 @@ - +### +### "THE BEER-WARE LICENSE": +### Alberto Ramos and Fernando Panadero +### wrote this file. As long as you retain this +### notice you can do whatever you want with this stuff. If we meet some +### day, and you think this stuff is worth it, you can buy us a beer in +### return. +### +### file: Diracoper.jl +### created: Thu Nov 18 17:20:24 2021 +### diff --git a/src/Solvers/CG.jl b/src/Solvers/CG.jl index ffec5d4..539fa26 100644 --- a/src/Solvers/CG.jl +++ b/src/Solvers/CG.jl @@ -1,9 +1,9 @@ ### ### "THE BEER-WARE LICENSE": -### Alberto Ramos wrote this file. As long as you retain this +### Alberto Ramos and Fernando Panadero wrote this file. As long as you retain this ### notice you can do whatever you want with this stuff. If we meet some -### day, and you think this stuff is worth it, you can buy me a beer in -### return. +### day, and you think this stuff is worth it, you can buy us a beer in +### return. ### ### file: CG.jl ### created: Tue Nov 30 11:10:57 2021 diff --git a/src/Solvers/Propagators.jl b/src/Solvers/Propagators.jl index ad07d3a..62301f8 100644 --- a/src/Solvers/Propagators.jl +++ b/src/Solvers/Propagators.jl @@ -1,4 +1,12 @@ - +### +### "THE BEER-WARE LICENSE": +### Fernando Panadero wrote this file based on Alberto Ramos' work. +### As long as you retain this notice you can do whatever you want with this stuff. If we meet some +### day, and you think this stuff is worth it, you can buy us a beer in +### return. +### +### file: Propagators.jl +### """ diff --git a/src/Spinors/Spinors.jl b/src/Spinors/Spinors.jl index 2058db8..b0f925b 100644 --- a/src/Spinors/Spinors.jl +++ b/src/Spinors/Spinors.jl @@ -1,9 +1,10 @@ ### ### "THE BEER-WARE LICENSE": -### Alberto Ramos wrote this file. As long as you retain this -### notice you can do whatever you want with this stuff. If we meet some -### day, and you think this stuff is worth it, you can buy me a beer in -### return. +### Alberto Ramos Carlos Pena and Fernando Panadero +### wrote this file. As long as you retain this +### notice you can do whatever you want with this stuff. If we meet some +### day, and you think this stuff is worth it, you can buy us a beer in +### return. ### ### file: Spinor.jl ### created: Wed Nov 17 13:00:31 2021 From 9ba07b506aa4d8919485692be266c81bb5eefd36 Mon Sep 17 00:00:00 2001 From: Nicolas Lang Date: Mon, 26 May 2025 12:48:04 +0200 Subject: [PATCH 5/6] fixed the OBC branch in the Zeuthen term - now results match openQCD --- src/YM/YM.jl | 2 +- src/YM/YMflow.jl | 30 ++++++++++++++++++++++++++---- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/src/YM/YM.jl b/src/YM/YM.jl index 5578730..8f862a4 100644 --- a/src/YM/YM.jl +++ b/src/YM/YM.jl @@ -165,7 +165,7 @@ include("YMfields.jl") export randomize!, zero!, norm2 include("YMact.jl") -export krnl_plaq!, force_gauge, force_gauge_flw, force_wilson +export krnl_plaq!, force_gauge, force_gauge_flw, force_wilson, bnd_rescale_flw! include("YMhmc.jl") export gauge_action, hamiltonian, plaquette, HMC!, MD! diff --git a/src/YM/YMflow.jl b/src/YM/YMflow.jl index 92c8e2c..62abde1 100644 --- a/src/YM/YMflow.jl +++ b/src/YM/YMflow.jl @@ -127,6 +127,22 @@ function add_zth_term(ymws::YMworkspace, U, lp) return nothing end + +""" + function rescale_bnd(ymws::YMworkspace, lp) + +Rescales the time-boundary of ymws.frc1 by a factor of 2.0 (needed for open BC) +""" +function rescale_bnd(ymws::YMworkspace, lp) + + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz bnd_rescale_flw!(ymws.frc1,lp::SpaceParm) + end + + return nothing +end + + function krnl_add_zth!(frc, frc2::AbstractArray{TA}, U::AbstractArray{TG}, lp::SpaceParm{N,M,B,D}) where {TA,TG,N,M,B,D} @inbounds begin @@ -155,12 +171,14 @@ function krnl_add_zth!(frc, frc2::AbstractArray{TA}, U::AbstractArray{TG}, lp::S end end if OBC - if (it > 1) && (it < lp.iL[end]) + if (id < N) frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) + projalg(U[b,id,r]*X/U[b,id,r])) - elseif ((it == lp.iL[end]) || (it == 1)) && (id < N) + elseif (it > 1) && (it < (lp.iL[end]-1)) frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) + projalg(U[b,id,r]*X/U[b,id,r])) + elseif (it == 1) || (it == (lp.iL[end]-1)) + frc2[b,id,r] = frc[b,id,r] end else frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) + @@ -171,6 +189,8 @@ function krnl_add_zth!(frc, frc2::AbstractArray{TA}, U::AbstractArray{TG}, lp::S return nothing end + + """ function flw(U, int::FlowIntr{NI,T}, ns::Int64, gp::GaugeParm, lp::SpaceParm, ymws::YMworkspace) @@ -204,18 +224,20 @@ flw(U, int::FlowIntr{NI,T}, ns::Int64, gp::GaugeParm, lp::SpaceParm, ymws::YMwor function flw(U, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, lp::SpaceParm{N,M,BC_OPEN,D}, ymws::YMworkspace) where {NI,T,N,M,D} @timeit "Integrating flow equations" begin for i in 1:ns - force_gauge_flw(ymws, U, int.c0, 1, gp, lp) + force_gauge(ymws, U, int.c0, 1, gp, lp) if int.add_zth add_zth_term(ymws::YMworkspace, U, lp) end + rescale_bnd(ymws, lp) ymws.mom .= ymws.frc1 U .= expm.(U, ymws.mom, 2*eps*int.r) for k in 1:NI - force_gauge_flw(ymws, U, int.c0, 1, gp, lp) + force_gauge(ymws, U, int.c0, 1, gp, lp) if int.add_zth add_zth_term(ymws::YMworkspace, U, lp) end + rescale_bnd(ymws, lp) ymws.mom .= int.e0[k].*ymws.mom .+ int.e1[k].*ymws.frc1 U .= expm.(U, ymws.mom, 2*eps) end From 51aa3a94f40cb742cceaed90f06682693f8109e3 Mon Sep 17 00:00:00 2001 From: "Fernando P. Panadero" Date: Tue, 27 May 2025 10:52:20 +0200 Subject: [PATCH 6/6] Tested version of OBC. Zth methods split. --- src/Dirac/Diracflow.jl | 95 +++++++++++++++++++++++++++++++++++++++--- src/YM/YMact.jl | 1 - src/YM/YMflow.jl | 86 ++++++++++++++++++++++++++------------ 3 files changed, 150 insertions(+), 32 deletions(-) diff --git a/src/Dirac/Diracflow.jl b/src/Dirac/Diracflow.jl index 575cda8..cfd2c65 100644 --- a/src/Dirac/Diracflow.jl +++ b/src/Dirac/Diracflow.jl @@ -18,7 +18,7 @@ Integrates the flow equations with the integration scheme defined by `int` perfo function flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T} @timeit "Integrating flow equations" begin for i in 1:ns - force_gauge_flw(ymws, U, int.c0, 1, gp, lp) + force_gauge(ymws, U, int.c0, 1, gp, lp) if int.add_zth add_zth_term(ymws::YMworkspace, U, lp) @@ -31,7 +31,7 @@ function flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, dpar::D U .= expm.(U, ymws.mom, 2*eps*int.r) for k in 1:NI - force_gauge_flw(ymws, U, int.c0, 1, gp, lp) + force_gauge(ymws, U, int.c0, 1, gp, lp) if int.add_zth add_zth_term(ymws::YMworkspace, U, lp) @@ -51,6 +51,43 @@ function flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, dpar::D end flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T} = flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, int.eps, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) +function flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm{N,M,BC_OPEN,D}, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T,N,M,D} + @timeit "Integrating flow equations" begin + for i in 1:ns + force_gauge(ymws, U, int.c0, 1, gp, lp) + + if int.add_zth + add_zth_term(ymws::YMworkspace, U, lp) + end + rescale_bnd(ymws, lp) + + Nablanabla!(dws.sAp, U, psi, dpar, dws, lp) + psi .= psi + 2*int.r*eps*dws.sAp + + ymws.mom .= ymws.frc1 + U .= expm.(U, ymws.mom, 2*eps*int.r) + + for k in 1:NI + force_gauge(ymws, U, int.c0, 1, gp, lp) + + if int.add_zth + add_zth_term(ymws::YMworkspace, U, lp) + end + rescale_bnd(ymws, lp) + + Nablanabla!(dws.sp, U, psi, dpar, dws, lp) + dws.sAp .= int.e0[k].*dws.sAp .+ int.e1[k].*dws.sp + psi .= psi + 2*eps*dws.sAp + + ymws.mom .= int.e0[k].*ymws.mom .+ int.e1[k].*ymws.frc1 + U .= expm.(U, ymws.mom, 2*eps) + end + end + end + + return nothing +end + """ function backflow(psi, U, Dt, nsave::Int64, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) @@ -122,7 +159,7 @@ function bflw_step!(psi, U, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam @timeit "GPU to CPU" V = Array(U) - force_gauge_flw(ymws, U, int.c0, 1, gp, lp) + force_gauge(ymws, U, int.c0, 1, gp, lp) if int.add_zth add_zth_term(ymws::YMworkspace, U, lp) @@ -131,7 +168,7 @@ function bflw_step!(psi, U, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam ymws.mom .= ymws.frc1 U .= expm.(U, ymws.mom, 2*eps*int.r) - force_gauge_flw(ymws, U, int.c0, 1, gp, lp) + force_gauge(ymws, U, int.c0, 1, gp, lp) if int.add_zth add_zth_term(ymws::YMworkspace, U, lp) @@ -144,7 +181,7 @@ function bflw_step!(psi, U, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam @timeit "CPU to GPU" copyto!(U,V) - force_gauge_flw(ymws, U, int.c0, 1, gp, lp) + force_gauge(ymws, U, int.c0, 1, gp, lp) if int.add_zth add_zth_term(ymws::YMworkspace, U, lp) @@ -165,6 +202,54 @@ function bflw_step!(psi, U, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam return nothing end +function bflw_step!(psi, U, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm{N,M,BC_OPEN,D}, ymws::YMworkspace, dws::DiracWorkspace) where {N,M,D} + + @timeit "Backflow step" begin + + @timeit "GPU to CPU" V = Array(U) + + force_gauge(ymws, U, int.c0, 1, gp, lp) + if int.add_zth + add_zth_term(ymws::YMworkspace, U, lp) + end + rescale_bnd(ymws, lp) + + ymws.mom .= ymws.frc1 + U .= expm.(U, ymws.mom, 2*eps*int.r) + + force_gaugew(ymws, U, int.c0, 1, gp, lp) + if int.add_zth + add_zth_term(ymws::YMworkspace, U, lp) + end + rescale_bnd(ymws, lp) + + ymws.mom .= int.e0[1].*ymws.mom .+ int.e1[1].*ymws.frc1 + U .= expm.(U, ymws.mom, 2*eps) + + Nablanabla!(dws.sp, U, 0.75*2*eps*psi, dpar, dws, lp) + + @timeit "CPU to GPU" copyto!(U,V) + + force_gauge(ymws, U, int.c0, 1, gp, lp) + if int.add_zth + add_zth_term(ymws::YMworkspace, U, lp) + end + rescale_bnd(ymws, lp) + + U .= expm.(U, ymws.frc1, 2*eps*int.r) + + Nablanabla!(dws.sAp, U, 2*eps*dws.sp, dpar, dws, lp) + dws.sAp .= psi + (8/9)*dws.sAp + + @timeit "CPU to GPU" copyto!(U,V) + + Nablanabla!(psi, U, 2*eps*(dws.sAp - (8/9)*dws.sp), dpar, dws, lp) + psi .= (1/4)*psi + dws.sp + dws.sAp + + end + + return nothing +end """ flw_adapt(U, psi, int::FlowIntr{NI,T}, tend::T, epsini::T, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) diff --git a/src/YM/YMact.jl b/src/YM/YMact.jl index 272fe91..fa6fdae 100644 --- a/src/YM/YMact.jl +++ b/src/YM/YMact.jl @@ -1189,7 +1189,6 @@ function force_gauge_flw(ymws::YMworkspace, U, c0, cG, gp::GaugeParm, lp::SpaceP return nothing end - """ function force_wilson(ymws::YMworkspace, U, gp::GaugeParm, lp::SpaceParm) diff --git a/src/YM/YMflow.jl b/src/YM/YMflow.jl index 62abde1..dbcbf4c 100644 --- a/src/YM/YMflow.jl +++ b/src/YM/YMflow.jl @@ -142,7 +142,63 @@ function rescale_bnd(ymws::YMworkspace, lp) return nothing end +function krnl_add_zth!(frc, frc2::AbstractArray{TA}, U::AbstractArray{TG}, lp::SpaceParm{N,M,BC_OPEN,D}) where {TA,TG,N,M,D} + @inbounds begin + b = Int64(CUDA.threadIdx().x) + r = Int64(CUDA.blockIdx().x) + it = point_time((b, r), lp) + + @inbounds for id in 1:N + bu, ru = up((b,r), id, lp) + bd, rd = dw((b,r), id, lp) + + X = frc[bu,id,ru] + Y = frc[bd,id,rd] + Ud = U[bd,id,rd] + + if (id < N) + frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) + + projalg(U[b,id,r]*X/U[b,id,r])) + elseif (it > 1) && (it < (lp.iL[end]-1)) + frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) + + projalg(U[b,id,r]*X/U[b,id,r])) + elseif (it == 1) || (it == (lp.iL[end]-1)) + frc2[b,id,r] = frc[b,id,r] + end + end + end + return nothing +end + +function krnl_add_zth!(frc, frc2::AbstractArray{TA}, U::AbstractArray{TG}, lp::Union{SpaceParm{N,M,BC_SF_ORBI,D},SpaceParm{N,M,BC_SF_AFWB,D}}) where {TA,TG,N,M,D} + + @inbounds begin + b = Int64(CUDA.threadIdx().x) + r = Int64(CUDA.blockIdx().x) + it = point_time((b, r), lp) + + @inbounds for id in 1:N + bu, ru = up((b,r), id, lp) + bd, rd = dw((b,r), id, lp) + + X = frc[bu,id,ru] + Y = frc[bd,id,rd] + Ud = U[bd,id,rd] + + if (it > 1) && (it < lp.iL[end]) + frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) + + projalg(U[b,id,r]*X/U[b,id,r])) + elseif (it == lp.iL[end]) && (id < N) + frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) + + projalg(U[b,id,r]*X/U[b,id,r])) + else + frc2[b,id,r] = frc[b,id,r] + end + end + end + return nothing +end function krnl_add_zth!(frc, frc2::AbstractArray{TA}, U::AbstractArray{TG}, lp::SpaceParm{N,M,B,D}) where {TA,TG,N,M,B,D} @inbounds begin @@ -150,9 +206,6 @@ function krnl_add_zth!(frc, frc2::AbstractArray{TA}, U::AbstractArray{TG}, lp::S r = Int64(CUDA.blockIdx().x) it = point_time((b, r), lp) - SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) - OBC = (B == BC_OPEN) - @inbounds for id in 1:N bu, ru = up((b,r), id, lp) bd, rd = dw((b,r), id, lp) @@ -161,29 +214,8 @@ function krnl_add_zth!(frc, frc2::AbstractArray{TA}, U::AbstractArray{TG}, lp::S Y = frc[bd,id,rd] Ud = U[bd,id,rd] - if SFBC - if (it > 1) && (it < lp.iL[end]) - frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) + - projalg(U[b,id,r]*X/U[b,id,r])) - elseif (it == lp.iL[end]) && (id < N) - frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) + - projalg(U[b,id,r]*X/U[b,id,r])) - end - end - if OBC - if (id < N) - frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) + - projalg(U[b,id,r]*X/U[b,id,r])) - elseif (it > 1) && (it < (lp.iL[end]-1)) - frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) + - projalg(U[b,id,r]*X/U[b,id,r])) - elseif (it == 1) || (it == (lp.iL[end]-1)) - frc2[b,id,r] = frc[b,id,r] - end - else - frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) + - projalg(U[b,id,r]*X/U[b,id,r])) - end + frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) + + projalg(U[b,id,r]*X/U[b,id,r])) end end return nothing @@ -224,11 +256,13 @@ flw(U, int::FlowIntr{NI,T}, ns::Int64, gp::GaugeParm, lp::SpaceParm, ymws::YMwor function flw(U, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, lp::SpaceParm{N,M,BC_OPEN,D}, ymws::YMworkspace) where {NI,T,N,M,D} @timeit "Integrating flow equations" begin for i in 1:ns + force_gauge(ymws, U, int.c0, 1, gp, lp) if int.add_zth add_zth_term(ymws::YMworkspace, U, lp) end rescale_bnd(ymws, lp) + ymws.mom .= ymws.frc1 U .= expm.(U, ymws.mom, 2*eps*int.r)