Merge branch 'master' into 'master'

Master

See merge request alramos/latticegpu.jl!7
This commit is contained in:
Alberto Ramos Martinez 2025-05-27 12:45:07 +00:00
commit 3a0d8a3a61
13 changed files with 264 additions and 60 deletions

View file

@ -1,9 +1,10 @@
###
### "THE BEER-WARE LICENSE":
### Alberto Ramos and Carlos Pena wrote this file. As long as you retain this
### Alberto Ramos Carlos Pena and Fernando Panadero
### wrote this file. As long as you retain this
### notice you can do whatever you want with this stuff. If we meet some
### day, and you think this stuff is worth it, you can buy us a beer in
### return. <alberto.ramos@cern.ch> <carlos.pena@uam.es>
### return. <alberto.ramos@cern.ch> <carlos.pena@uam.es> <fernando.p@csic.es>
###
### file: Dirac.jl
### created: Thu Nov 18 17:20:24 2021

View file

@ -1,3 +1,12 @@
###
### "THE BEER-WARE LICENSE":
### Fernando Panadero wrote this file based on Alberto Ramos' work.
### As long as you retain this notice you can do whatever you want with this stuff. If we meet some
### day, and you think this stuff is worth it, you can buy us a beer in
### return. <alberto.ramos@cern.ch> <fernando.p@csic.es>
###
### file: DiracIO.jl
###
"""
read_prop(fname::String)
@ -73,7 +82,6 @@ function read_prop(fname::String)
return CuArray(psicpu)
end
"""
save_prop(fname, psi, lp::SpaceParm, dpar::DiracParam; run::Union{Nothing,String}=nothing)
@ -131,8 +139,6 @@ function save_prop(fname::String, psi, lp::SpaceParm{4,M,B,D}, dpar::DiracParam;
return nothing
end
"""
read_dpar(fname::String)

View file

@ -1,4 +1,12 @@
###
### "THE BEER-WARE LICENSE":
### Fernando Panadero wrote this file based on Alberto Ramos' work.
### As long as you retain this notice you can do whatever you want with this stuff. If we meet some
### day, and you think this stuff is worth it, you can buy us a beer in
### return. <alberto.ramos@cern.ch> <fernando.p@csic.es>
###
### file: Diracfields.jl
###
"""

View file

@ -1,7 +1,20 @@
###
### "THE BEER-WARE LICENSE":
### Fernando Panadero wrote this file based on Alberto Ramos' work.
### As long as you retain this notice you can do whatever you want with this stuff. If we meet some
### day, and you think this stuff is worth it, you can buy us a beer in
### return. <alberto.ramos@cern.ch> <fernando.p@csic.es>
###
### file: Diracflow.jl
###
import ..YM.flw, ..YM.force_gauge, ..YM.flw_adapt
"""
flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace)
Integrates the flow equations with the integration scheme defined by `int` performing `ns` steps with fixed step size. The configuration `U` is overwritten.
"""
function flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T}
@timeit "Integrating flow equations" begin
for i in 1:ns
@ -38,17 +51,52 @@ function flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, dpar::D
end
flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T} = flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, int.eps, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace)
function flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm{N,M,BC_OPEN,D}, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T,N,M,D}
@timeit "Integrating flow equations" begin
for i in 1:ns
force_gauge(ymws, U, int.c0, 1, gp, lp)
if int.add_zth
add_zth_term(ymws::YMworkspace, U, lp)
end
rescale_bnd(ymws, lp)
Nablanabla!(dws.sAp, U, psi, dpar, dws, lp)
psi .= psi + 2*int.r*eps*dws.sAp
ymws.mom .= ymws.frc1
U .= expm.(U, ymws.mom, 2*eps*int.r)
for k in 1:NI
force_gauge(ymws, U, int.c0, 1, gp, lp)
if int.add_zth
add_zth_term(ymws::YMworkspace, U, lp)
end
rescale_bnd(ymws, lp)
Nablanabla!(dws.sp, U, psi, dpar, dws, lp)
dws.sAp .= int.e0[k].*dws.sAp .+ int.e1[k].*dws.sp
psi .= psi + 2*eps*dws.sAp
ymws.mom .= int.e0[k].*ymws.mom .+ int.e1[k].*ymws.frc1
U .= expm.(U, ymws.mom, 2*eps)
end
end
end
return nothing
end
"""
function backflow(psi, U, Dt, nsave::Int64, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace)
Performs the integration of the adjoint flow for the fermion field, according to 1302.5246. The fermion field must me that of the time-slice Dt and is flowed back to the first time-slice
nsave is the total number of gauge fields saved in the process
Performs the integration of the adjoint flow for the fermion field, according to 1302.5246. The fermion field must me that of the time-slice Dt and is flowed back to the first time-slice.
nsave is the total number of gauge fields saved in the process. The default integrator is wfl_rk3(Float64,0.01,1.0) with adaptive steps, it has to be order 3 rk but in can be zfl.
"""
function backflow(psi, U, Dt, maxnsave::Int64, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm,int::FlowIntr, ymws::YMworkspace, dws::DiracWorkspace)
# Default integrator is wfl_rk3(Float64,0.01,1.0), it has to be order 3 rk but in can be zfl
@timeit "Backflow integration" begin
@timeit "GPU to CPU" U0 = Array(U)
@ -103,7 +151,7 @@ backflow(psi, U, Dt, maxnsave::Int64, gp::GaugeParm, dpar::DiracParam, lp::Space
"""
function bflw_step!(U, psi, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace)
Performs ONE backstep in psi, from t to t-\eps. U is supposed to be the one in t-\eps and is left unchanged. So far, int has to be rk4
Performs ONE backstep in psi, from t to t-\eps. U is supposed to be the one in t-\eps and is left unchanged. So far, int has to be rk3
"""
function bflw_step!(psi, U, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace)
@ -154,7 +202,60 @@ function bflw_step!(psi, U, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam
return nothing
end
function bflw_step!(psi, U, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm{N,M,BC_OPEN,D}, ymws::YMworkspace, dws::DiracWorkspace) where {N,M,D}
@timeit "Backflow step" begin
@timeit "GPU to CPU" V = Array(U)
force_gauge(ymws, U, int.c0, 1, gp, lp)
if int.add_zth
add_zth_term(ymws::YMworkspace, U, lp)
end
rescale_bnd(ymws, lp)
ymws.mom .= ymws.frc1
U .= expm.(U, ymws.mom, 2*eps*int.r)
force_gaugew(ymws, U, int.c0, 1, gp, lp)
if int.add_zth
add_zth_term(ymws::YMworkspace, U, lp)
end
rescale_bnd(ymws, lp)
ymws.mom .= int.e0[1].*ymws.mom .+ int.e1[1].*ymws.frc1
U .= expm.(U, ymws.mom, 2*eps)
Nablanabla!(dws.sp, U, 0.75*2*eps*psi, dpar, dws, lp)
@timeit "CPU to GPU" copyto!(U,V)
force_gauge(ymws, U, int.c0, 1, gp, lp)
if int.add_zth
add_zth_term(ymws::YMworkspace, U, lp)
end
rescale_bnd(ymws, lp)
U .= expm.(U, ymws.frc1, 2*eps*int.r)
Nablanabla!(dws.sAp, U, 2*eps*dws.sp, dpar, dws, lp)
dws.sAp .= psi + (8/9)*dws.sAp
@timeit "CPU to GPU" copyto!(U,V)
Nablanabla!(psi, U, 2*eps*(dws.sAp - (8/9)*dws.sp), dpar, dws, lp)
psi .= (1/4)*psi + dws.sp + dws.sAp
end
return nothing
end
"""
flw_adapt(U, psi, int::FlowIntr{NI,T}, tend::T, epsini::T, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace)
Integrates the flow equations with the integration scheme defined by `int` using the adaptive step size integrator up to `tend` with the tolerance defined in `int`. The configuration `U` is overwritten.
"""
function flw_adapt(U, psi, int::FlowIntr{NI,T}, tend::T, epsini::T, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T}
eps = epsini

View file

@ -1,4 +1,14 @@
###
### "THE BEER-WARE LICENSE":
### Alberto Ramos and Fernando Panadero
### wrote this file. As long as you retain this
### notice you can do whatever you want with this stuff. If we meet some
### day, and you think this stuff is worth it, you can buy us a beer in
### return. <alberto.ramos@cern.ch> <fernando.p@csic.es>
###
### file: Diracoper.jl
### created: Thu Nov 18 17:20:24 2021
###

View file

@ -1,9 +1,9 @@
###
### "THE BEER-WARE LICENSE":
### Alberto Ramos wrote this file. As long as you retain this
### Alberto Ramos and Fernando Panadero wrote this file. As long as you retain this
### notice you can do whatever you want with this stuff. If we meet some
### day, and you think this stuff is worth it, you can buy me a beer in
### return. <alberto.ramos@cern.ch>
### day, and you think this stuff is worth it, you can buy us a beer in
### return. <alberto.ramos@cern.ch> <fernando.p@csic.es>
###
### file: CG.jl
### created: Tue Nov 30 11:10:57 2021

View file

@ -1,4 +1,12 @@
###
### "THE BEER-WARE LICENSE":
### Fernando Panadero wrote this file based on Alberto Ramos' work.
### As long as you retain this notice you can do whatever you want with this stuff. If we meet some
### day, and you think this stuff is worth it, you can buy us a beer in
### return. <alberto.ramos@cern.ch> <fernando.p@csic.es>
###
### file: Propagators.jl
###
"""

View file

@ -1,9 +1,10 @@
###
### "THE BEER-WARE LICENSE":
### Alberto Ramos wrote this file. As long as you retain this
### Alberto Ramos Carlos Pena and Fernando Panadero
### wrote this file. As long as you retain this
### notice you can do whatever you want with this stuff. If we meet some
### day, and you think this stuff is worth it, you can buy me a beer in
### return. <alberto.ramos@cern.ch>
### day, and you think this stuff is worth it, you can buy us a beer in
### return. <alberto.ramos@cern.ch> <carlos.pena@uam.es> <fernando.p@csic.es>
###
### file: Spinor.jl
### created: Wed Nov 17 13:00:31 2021

View file

@ -165,7 +165,7 @@ include("YMfields.jl")
export randomize!, zero!, norm2
include("YMact.jl")
export krnl_plaq!, force_gauge, force_gauge_flw, force_wilson
export krnl_plaq!, force_gauge, force_gauge_flw, force_wilson, bnd_rescale_flw!
include("YMhmc.jl")
export gauge_action, hamiltonian, plaquette, HMC!, MD!

View file

@ -1189,7 +1189,6 @@ function force_gauge_flw(ymws::YMworkspace, U, c0, cG, gp::GaugeParm, lp::SpaceP
return nothing
end
"""
function force_wilson(ymws::YMworkspace, U, gp::GaugeParm, lp::SpaceParm)

View file

@ -9,16 +9,17 @@
### created: Thu Jul 15 15:16:47 2021
###
"""
function randomize!(f, lp::SpaceParm, ymws::YMworkspace)
function randomize!(f, lp::SpaceParm, ymws::YMworkspace; curng=CUDA.default_rng())
Given an algebra field with natural indexing, this routine sets the components to random Gaussian distributed values. If SF boundary conditions are used, the force at the boundaries is set to zero.
"""
function randomize!(f, lp::SpaceParm, ymws::YMworkspace)
function randomize!(f, lp::SpaceParm, ymws::YMworkspace; curng=CUDA.default_rng())
if ymws.ALG == SU2alg
@timeit "Randomize SU(2) algebra field" begin
m = CUDA.randn(ymws.PRC, lp.bsz,lp.ndim,3,lp.rsz)
m = Random.randn(curng, ymws.PRC, lp.bsz,lp.ndim,3,lp.rsz)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_SU2!(f,m,lp)
end
@ -28,7 +29,7 @@ function randomize!(f, lp::SpaceParm, ymws::YMworkspace)
if ymws.ALG == SU3alg
@timeit "Randomize SU(3) algebra field" begin
m = CUDA.randn(ymws.PRC, lp.bsz,lp.ndim,8,lp.rsz)
m = Random.randn(curng, ymws.PRC, lp.bsz,lp.ndim,8,lp.rsz)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_SU3!(f,m,lp)
end

View file

@ -127,16 +127,57 @@ function add_zth_term(ymws::YMworkspace, U, lp)
return nothing
end
function krnl_add_zth!(frc, frc2::AbstractArray{TA}, U::AbstractArray{TG}, lp::SpaceParm{N,M,B,D}) where {TA,TG,N,M,B,D}
"""
function rescale_bnd(ymws::YMworkspace, lp)
Rescales the time-boundary of ymws.frc1 by a factor of 2.0 (needed for open BC)
"""
function rescale_bnd(ymws::YMworkspace, lp)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz bnd_rescale_flw!(ymws.frc1,lp::SpaceParm)
end
return nothing
end
function krnl_add_zth!(frc, frc2::AbstractArray{TA}, U::AbstractArray{TG}, lp::SpaceParm{N,M,BC_OPEN,D}) where {TA,TG,N,M,D}
@inbounds begin
b = Int64(CUDA.threadIdx().x)
r = Int64(CUDA.blockIdx().x)
it = point_time((b, r), lp)
@inbounds for id in 1:N
bu, ru = up((b,r), id, lp)
bd, rd = dw((b,r), id, lp)
X = frc[bu,id,ru]
Y = frc[bd,id,rd]
Ud = U[bd,id,rd]
if (id < N)
frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) +
projalg(U[b,id,r]*X/U[b,id,r]))
elseif (it > 1) && (it < (lp.iL[end]-1))
frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) +
projalg(U[b,id,r]*X/U[b,id,r]))
elseif (it == 1) || (it == (lp.iL[end]-1))
frc2[b,id,r] = frc[b,id,r]
end
end
end
return nothing
end
function krnl_add_zth!(frc, frc2::AbstractArray{TA}, U::AbstractArray{TG}, lp::Union{SpaceParm{N,M,BC_SF_ORBI,D},SpaceParm{N,M,BC_SF_AFWB,D}}) where {TA,TG,N,M,D}
@inbounds begin
b = Int64(CUDA.threadIdx().x)
r = Int64(CUDA.blockIdx().x)
it = point_time((b, r), lp)
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) )
OBC = (B == BC_OPEN)
@inbounds for id in 1:N
bu, ru = up((b,r), id, lp)
bd, rd = dw((b,r), id, lp)
@ -145,31 +186,42 @@ function krnl_add_zth!(frc, frc2::AbstractArray{TA}, U::AbstractArray{TG}, lp::S
Y = frc[bd,id,rd]
Ud = U[bd,id,rd]
if SFBC
if (it > 1) && (it < lp.iL[end])
frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) +
projalg(U[b,id,r]*X/U[b,id,r]))
elseif (it == lp.iL[end]) && (id < N)
frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) +
projalg(U[b,id,r]*X/U[b,id,r]))
end
end
if OBC
if (it > 1) && (it < lp.iL[end])
frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) +
projalg(U[b,id,r]*X/U[b,id,r]))
elseif ((it == lp.iL[end]) || (it == 1)) && (id < N)
frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) +
projalg(U[b,id,r]*X/U[b,id,r]))
end
else
frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) +
projalg(U[b,id,r]*X/U[b,id,r]))
frc2[b,id,r] = frc[b,id,r]
end
end
end
return nothing
end
function krnl_add_zth!(frc, frc2::AbstractArray{TA}, U::AbstractArray{TG}, lp::SpaceParm{N,M,B,D}) where {TA,TG,N,M,B,D}
@inbounds begin
b = Int64(CUDA.threadIdx().x)
r = Int64(CUDA.blockIdx().x)
it = point_time((b, r), lp)
@inbounds for id in 1:N
bu, ru = up((b,r), id, lp)
bd, rd = dw((b,r), id, lp)
X = frc[bu,id,ru]
Y = frc[bd,id,rd]
Ud = U[bd,id,rd]
frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) +
projalg(U[b,id,r]*X/U[b,id,r]))
end
end
return nothing
end
"""
function flw(U, int::FlowIntr{NI,T}, ns::Int64, gp::GaugeParm, lp::SpaceParm, ymws::YMworkspace)
@ -204,18 +256,22 @@ flw(U, int::FlowIntr{NI,T}, ns::Int64, gp::GaugeParm, lp::SpaceParm, ymws::YMwor
function flw(U, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, lp::SpaceParm{N,M,BC_OPEN,D}, ymws::YMworkspace) where {NI,T,N,M,D}
@timeit "Integrating flow equations" begin
for i in 1:ns
force_gauge_flw(ymws, U, int.c0, 1, gp, lp)
force_gauge(ymws, U, int.c0, 1, gp, lp)
if int.add_zth
add_zth_term(ymws::YMworkspace, U, lp)
end
rescale_bnd(ymws, lp)
ymws.mom .= ymws.frc1
U .= expm.(U, ymws.mom, 2*eps*int.r)
for k in 1:NI
force_gauge_flw(ymws, U, int.c0, 1, gp, lp)
force_gauge(ymws, U, int.c0, 1, gp, lp)
if int.add_zth
add_zth_term(ymws::YMworkspace, U, lp)
end
rescale_bnd(ymws, lp)
ymws.mom .= int.e0[k].*ymws.mom .+ int.e1[k].*ymws.frc1
U .= expm.(U, ymws.mom, 2*eps)
end
@ -328,6 +384,10 @@ function Eoft_plaq(Eslc, U, gp::GaugeParm{T,G,NN}, lp::SpaceParm{N,M,B,D}, ymws:
for it in 1:lp.iL[end]
Eslc[it,ipl] = 2*Etmp[it]
end
if OBC ## Spatial plaquettes at time boundary count half (Luescher)
Eslc[1,ipl] = Etmp[1]
Eslc[end,ipl] = Etmp[end]
end
end
end
@ -426,16 +486,24 @@ function Eoft_clover(Eslc, U, gp::GaugeParm, lp::SpaceParm{4,M,B,D}, ymws::YMwor
end
Etmp .= reshape(Array(CUDA.reduce(+, ymws.rm;dims=tp)),lp.iL[end])/V3
for it in 1:lp.iL[end]
if (B == BC_OPEN) && (it == 1 || it == lp.iL[end])
Eslc[it,ipl1] = 0.0
else
Eslc[it,ipl1] = Etmp[it]/8
end
end
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_add_et!(ymws.rm, ymws.frc2, lp)
end
Etmp .= reshape(Array(CUDA.reduce(+, ymws.rm;dims=tp)),lp.iL[end])/V3
for it in 1:lp.iL[end]
if (B == BC_OPEN) && (it == 1 || it == lp.iL[end])
Eslc[it,ipl2] = 0.0
else
Eslc[it,ipl2] = Etmp[it]/8
end
end
return nothing
end

View file

@ -68,18 +68,19 @@ function hamiltonian(mom, U, lp, gp, ymws)
return K+V
end
"""
HMC!(U, int::IntrScheme, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace; noacc=false)
HMC!(U, int::IntrScheme, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace; noacc=false, rng=Random.default_rng(), curng=CUDA.default_rng())
Performs a HMC step (molecular dynamics integration and accept/reject step). The configuration `U` is updated and function returns the energy violation and if the configuration was accepted in a tuple.
"""
function HMC!(U, int::IntrScheme, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace{T}; noacc=false) where T
function HMC!(U, int::IntrScheme, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace{T}; noacc=false, rng=Random.default_rng(), curng=CUDA.default_rng()) where T
@timeit "HMC trayectory" begin
ymws.U1 .= U
randomize!(ymws.mom, lp, ymws)
randomize!(ymws.mom, lp, ymws; curng)
hini = hamiltonian(ymws.mom, U, lp, gp, ymws)
MD!(ymws.mom, U, int, lp, gp, ymws)
@ -93,7 +94,7 @@ function HMC!(U, int::IntrScheme, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspac
end
if (pacc < 1.0)
r = rand()
r = rand(rng)
if (pacc < r)
U .= ymws.U1
acc = false
@ -105,7 +106,7 @@ function HMC!(U, int::IntrScheme, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspac
end
return dh, acc
end
HMC!(U, eps, ns, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace{T}; noacc=false) where T = HMC!(U, omf4(T, eps, ns), lp, gp, ymws; noacc=noacc)
HMC!(U, eps, ns, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace{T}; noacc=false, rng=Random.default_rng(), curng=CUDA.default_rng()) where T = HMC!(U, omf4(T, eps, ns), lp, gp, ymws; noacc=noacc, rng, curng)
"""
function MD!(mom, U, int::IntrScheme, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace)