mirror of
https://igit.ific.uv.es/alramos/latticegpu.jl.git
synced 2025-06-30 22:09:27 +02:00
Merge branch 'master' of igit.ific.uv.es:fernando.p.csic.es/latticegpu.jl into fix/flow_obc
This commit is contained in:
commit
349ff2405f
17 changed files with 585 additions and 165 deletions
|
@ -151,16 +151,17 @@ function krnl_assign_pf_su3!(f::AbstractArray, p , lp::SpaceParm, t::Int64)
|
|||
b = Int64(CUDA.threadIdx().x)
|
||||
r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
if t == 0
|
||||
if t == 0
|
||||
f[b,r] = Spinor(map(x->SU3fund(x[b,1,r,1] + im* x[b,1,r,2],
|
||||
x[b,2,r,1] + im* x[b,2,r,2],
|
||||
x[b,3,r,1] + im* x[b,3,r,2]),p))
|
||||
elseif point_time((b,r),lp) == t
|
||||
x[b,2,r,1] + im* x[b,2,r,2],
|
||||
x[b,3,r,1] + im* x[b,3,r,2]),p))
|
||||
elseif point_time((b,r),lp) == t
|
||||
f[b,r] = Spinor(map(x->SU3fund(x[b,1,r,1] + im* x[b,1,r,2],
|
||||
x[b,2,r,1] + im* x[b,2,r,2],
|
||||
x[b,3,r,1] + im* x[b,3,r,2]),p))
|
||||
end
|
||||
|
||||
x[b,2,r,1] + im* x[b,2,r,2],
|
||||
x[b,3,r,1] + im* x[b,3,r,2]),p))
|
||||
else
|
||||
f[b,r] = 0.0*f[b,r]
|
||||
end
|
||||
end
|
||||
|
||||
return nothing
|
||||
|
@ -197,14 +198,15 @@ function krnl_assign_pf_su2!(f::AbstractArray, p , lp::SpaceParm, t::Int64)
|
|||
b = Int64(CUDA.threadIdx().x)
|
||||
r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
if t == 0
|
||||
if t == 0
|
||||
f[b,r] = Spinor(map(x->SU2fund(x[b,1,r,1] + im* x[b,1,r,2],
|
||||
x[b,2,r,1] + im* x[b,2,r,2]),p))
|
||||
elseif point_time((b,r),lp) == t
|
||||
x[b,2,r,1] + im* x[b,2,r,2]),p))
|
||||
elseif point_time((b,r),lp) == t
|
||||
f[b,r] = Spinor(map(x->SU2fund(x[b,1,r,1] + im* x[b,1,r,2],
|
||||
x[b,2,r,1] + im* x[b,2,r,2]),p))
|
||||
end
|
||||
|
||||
x[b,2,r,1] + im* x[b,2,r,2]),p))
|
||||
else
|
||||
f[b,r] = 0.0*f[b,r]
|
||||
end
|
||||
end
|
||||
|
||||
return nothing
|
||||
|
|
|
@ -41,13 +41,13 @@ flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, gp::GaugeParm, dpar::DiracParam, lp:
|
|||
"""
|
||||
function backflow(psi, U, Dt, nsave::Int64, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace)
|
||||
|
||||
Performs one step back in flow time for the fermion field, according to 1302.5246. The fermion field must me that of the time-slice Dt and is flowed back to the first time-slice
|
||||
Performs the integration of the adjoint flow for the fermion field, according to 1302.5246. The fermion field must me that of the time-slice Dt and is flowed back to the first time-slice
|
||||
nsave is the total number of gauge fields saved in the process
|
||||
|
||||
"""
|
||||
function backflow(psi, U, Dt, maxnsave::Int64, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace)
|
||||
function backflow(psi, U, Dt, maxnsave::Int64, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm,int::FlowIntr, ymws::YMworkspace, dws::DiracWorkspace)
|
||||
|
||||
int = wfl_rk3(Float64,0.01,1.0) # Default integrator, it has to be order 3 rk but in can be zfl
|
||||
# Default integrator is wfl_rk3(Float64,0.01,1.0), it has to be order 3 rk but in can be zfl
|
||||
|
||||
@timeit "Backflow integration" begin
|
||||
@timeit "GPU to CPU" U0 = Array(U)
|
||||
|
@ -98,6 +98,7 @@ function backflow(psi, U, Dt, maxnsave::Int64, gp::GaugeParm, dpar::DiracParam,
|
|||
|
||||
return nothing
|
||||
end
|
||||
backflow(psi, U, Dt, maxnsave::Int64, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) = backflow(psi, U, Dt, maxnsave, gp, dpar, lp, wfl_rk3(Float64,0.01,1.0), ymws, dws)
|
||||
|
||||
"""
|
||||
function bflw_step!(U, psi, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace)
|
||||
|
@ -108,8 +109,7 @@ function bflw_step!(psi, U, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam
|
|||
|
||||
@timeit "Backflow step" begin
|
||||
|
||||
V = copy(U)
|
||||
V .= U
|
||||
@timeit "GPU to CPU" V = Array(U)
|
||||
|
||||
force_gauge(ymws, U, int.c0, 1, gp, lp)
|
||||
|
||||
|
@ -131,7 +131,7 @@ function bflw_step!(psi, U, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam
|
|||
|
||||
Nablanabla!(dws.sp, U, 0.75*2*eps*psi, dpar, dws, lp)
|
||||
|
||||
U .= V
|
||||
@timeit "CPU to GPU" copyto!(U,V)
|
||||
|
||||
force_gauge(ymws, U, int.c0, 1, gp, lp)
|
||||
|
||||
|
@ -144,7 +144,7 @@ function bflw_step!(psi, U, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam
|
|||
Nablanabla!(dws.sAp, U, 2*eps*dws.sp, dpar, dws, lp)
|
||||
dws.sAp .= psi + (8/9)*dws.sAp
|
||||
|
||||
U .= V
|
||||
@timeit "CPU to GPU" copyto!(U,V)
|
||||
|
||||
Nablanabla!(psi, U, 2*eps*(dws.sAp - (8/9)*dws.sp), dpar, dws, lp)
|
||||
psi .= (1/4)*psi + dws.sp + dws.sAp
|
||||
|
@ -166,8 +166,9 @@ function flw_adapt(U, psi, int::FlowIntr{NI,T}, tend::T, epsini::T, gp::GaugePar
|
|||
if ns > 10
|
||||
flw(U, psi, int, 9, eps, gp, dpar, lp, ymws, dws)
|
||||
ymws.U1 .= U
|
||||
dws.sr .= psi
|
||||
flw(U, psi, int, 1, eps, gp, dpar, lp, ymws, dws)
|
||||
flw(ymws.U1, int, 2, eps/2, gp, lp, ymws)
|
||||
flw(ymws.U1,dws.sr, int, 2, eps/2, gp, dpar,lp, ymws,dws)
|
||||
|
||||
dt = dt - 10*eps
|
||||
nstp = nstp + 10
|
||||
|
@ -175,8 +176,10 @@ function flw_adapt(U, psi, int::FlowIntr{NI,T}, tend::T, epsini::T, gp::GaugePar
|
|||
|
||||
# adjust step size
|
||||
ymws.U1 .= ymws.U1 ./ U
|
||||
dws.sr .= dws.sr .- psi
|
||||
maxd = CUDA.mapreduce(dev_one, max, ymws.U1, init=zero(tend))
|
||||
eps = min(int.max_eps, 2*eps, int.sft_fac*eps*(int.tol/maxd)^(one(tend)/3))
|
||||
pfdist = sqrt(CUDA.mapreduce(norm2, max, dws.sr, init=zero(tend)))
|
||||
eps = min(int.max_eps, 2*eps, int.sft_fac*eps*(int.tol/maxd)^(one(tend)/3),int.sft_fac*eps*(int.tol/pfdist)^(one(tend)/3))
|
||||
|
||||
else
|
||||
flw(U, psi, int, ns, eps, gp, dpar, lp, ymws, dws)
|
||||
|
@ -205,7 +208,7 @@ flw_adapt(U, psi, int::FlowIntr{NI,T}, tend::T, gp::GaugeParm, dpar::DiracParam,
|
|||
|
||||
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
|
||||
|
||||
Computes /`/` \\nabla^* \\nabla /`/` `si` and stores it in `si`.
|
||||
Computes /`/` \\nabla^* \\nabla /`/` `si` and stores it in `so`.
|
||||
|
||||
"""
|
||||
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
|
||||
|
@ -216,6 +219,7 @@ function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Space
|
|||
end
|
||||
return nothing
|
||||
end
|
||||
|
||||
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D},SpaceParm{4,6,BC_OPEN,D}}) where {D}
|
||||
SF_bndfix!(si,lp)
|
||||
@timeit "Laplacian" begin
|
||||
|
@ -238,7 +242,7 @@ function krnl_Nablanabla(so, U, si, th, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
|||
|
||||
so[b,r] = -4*si[b,r]
|
||||
|
||||
bu1, ru1 = up((b,r), 1, lp)
|
||||
bu1, ru1 = up((b,r), 1, lp)
|
||||
bd1, rd1 = dw((b,r), 1, lp)
|
||||
bu2, ru2 = up((b,r), 2, lp)
|
||||
bd2, rd2 = dw((b,r), 2, lp)
|
||||
|
@ -313,9 +317,29 @@ function krnl_Nablanabla(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},Sp
|
|||
end
|
||||
|
||||
|
||||
|
||||
export Nablanabla!, flw, backflow, flw_adapt, bflw_step!
|
||||
|
||||
"""
|
||||
function bfl_error(psi_t, psi_0, U, tend, int::FlowIntr, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace)
|
||||
|
||||
Estimates the error of the backflow integration of `\\psi\\_t` into `\\psi\\_0` with a random noise source.
|
||||
"""
|
||||
function bfl_error(psi_t, psi_0, U, tend, int::FlowIntr, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace)
|
||||
|
||||
pfrandomize!(dws.sr,lp)
|
||||
@timeit "GPU to CPU" V = Array(U)
|
||||
|
||||
R0 = sum(dot.(psi_0,dws.sr))
|
||||
|
||||
flw_adapt(U, dws.sr, int, tend, int.eps_ini/2, gp, dpar, lp, ymws, dws)
|
||||
|
||||
R1 = sum(dot.(psi_t,dws.sr))
|
||||
@timeit "CPU to GPU" copyto!(U,V)
|
||||
|
||||
return abs(R0-R1)
|
||||
end
|
||||
|
||||
export bfl_error
|
||||
|
||||
"""
|
||||
function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
|
||||
|
@ -362,7 +386,6 @@ function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceP
|
|||
return nothing
|
||||
end
|
||||
|
||||
|
||||
function krnl_g5Dslsh!(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||
|
||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||
|
@ -393,7 +416,6 @@ function krnl_g5Dslsh!(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},Spac
|
|||
return nothing
|
||||
end
|
||||
|
||||
|
||||
function krnl_g5Dslsh!(so, U, si, th, lp::SpaceParm{4,6,B,D}) where {D,B}
|
||||
|
||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||
|
@ -436,8 +458,6 @@ function krnl_g5Dslsh_impr!(so, Fcsw, csw, si, lp::SpaceParm{4,6,B,D}) where {B,
|
|||
return nothing
|
||||
end
|
||||
|
||||
|
||||
|
||||
function krnl_g5Dslsh_impr!(so, Fcsw, csw, si, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||
|
||||
@inbounds begin
|
||||
|
|
|
@ -40,7 +40,7 @@ include("YM/YM.jl")
|
|||
using .YM
|
||||
export ztwist
|
||||
export YMworkspace, GaugeParm, force0_wilson!, field, field_pln, randomize!, zero!, norm2
|
||||
export force_gauge, MD!
|
||||
export force_gauge, force_gauge_flw, MD!
|
||||
export gauge_action, hamiltonian, plaquette, HMC!, OMF4!
|
||||
export Eoft_clover, Eoft_plaq, Qtop
|
||||
export FlowIntr, wfl_euler, zfl_euler, wfl_rk2, zfl_rk2, wfl_rk3, zfl_rk3
|
||||
|
@ -60,7 +60,7 @@ using .Dirac
|
|||
export DiracWorkspace, DiracParam
|
||||
export Dw!, g5Dw!, DwdagDw!, SF_bndfix!, Csw!, pfrandomize!, mtwmdpar
|
||||
export read_prop, save_prop, read_dpar
|
||||
export Nablanabla!, flw, backflow
|
||||
export Nablanabla!, flw, backflow, bfl_error
|
||||
|
||||
include("Solvers/Solvers.jl")
|
||||
using .Solvers
|
||||
|
|
|
@ -165,7 +165,7 @@ include("YMfields.jl")
|
|||
export randomize!, zero!, norm2
|
||||
|
||||
include("YMact.jl")
|
||||
export krnl_plaq!, force_gauge, force_wilson
|
||||
export krnl_plaq!, force_gauge, force_gauge_flw, force_wilson
|
||||
|
||||
include("YMhmc.jl")
|
||||
export gauge_action, hamiltonian, plaquette, HMC!, MD!
|
||||
|
|
|
@ -320,6 +320,22 @@ function krnl_force_impr_pln!(frc1, frc2, U::AbstractArray{T}, c0, c1, Ubnd, cG,
|
|||
return nothing
|
||||
end
|
||||
|
||||
function bnd_rescale_flw!(frc1, lp::SpaceParm{N,M,BC_OPEN,D}) where {N,M,D}
|
||||
|
||||
@inbounds begin
|
||||
b = Int64(CUDA.threadIdx().x)
|
||||
r = Int64(CUDA.blockIdx().x)
|
||||
I = point_coord((b,r), lp)
|
||||
it = I[N]
|
||||
|
||||
for id in 1:N-1
|
||||
if (((it == 1) || (it == lp.iL[4])))
|
||||
frc1[b,id,r] = 2*frc1[b,id,r]
|
||||
end
|
||||
end
|
||||
end
|
||||
return nothing
|
||||
end
|
||||
|
||||
##
|
||||
## SF
|
||||
|
@ -874,7 +890,6 @@ function krnl_force_impr_pln!(frc1, frc2, U::AbstractArray{T}, c0, c1, Ubnd, cG,
|
|||
end
|
||||
|
||||
|
||||
|
||||
##
|
||||
## PERIODIC
|
||||
##
|
||||
|
@ -1143,6 +1158,38 @@ end
|
|||
force_gauge(ymws::YMworkspace, U, c0, gp, lp) = force_gauge(ymws, U, c0, gp.cG[1], gp, lp)
|
||||
force_gauge(ymws::YMworkspace, U, gp, lp) = force_gauge(ymws, U, gp.c0, gp.cG[1], gp, lp)
|
||||
|
||||
"""
|
||||
function force_gauge_flw(ymws::YMworkspace, U, c0, cG, gp::GaugeParm, lp::SpaceParm{N,M,BC_OPEN,D})
|
||||
|
||||
Computes the force for the gauge flow with Open Boundaries. An aditional factor two in the boundaries
|
||||
is included, see
|
||||
|
||||
M. Luescher, S. Schaefer: "Lattice QCD with open boundary conditions and twisted-mass reweighting", Comput.Phys.Commun. 184 (2013) 519,
|
||||
|
||||
for more details.
|
||||
|
||||
"""
|
||||
function force_gauge_flw(ymws::YMworkspace, U, c0, cG, gp::GaugeParm, lp::SpaceParm{N,M,BC_OPEN,D}) where {NI,N,M,D}
|
||||
|
||||
ztw = ztwist(gp, lp)
|
||||
if abs(c0-1) < 1.0E-10
|
||||
@timeit "Wilson gauge force" begin
|
||||
force_pln!(ymws.frc1, ymws.frc2, U, gp.Ubnd, cG, ztw, lp::SpaceParm)
|
||||
end
|
||||
else
|
||||
@timeit "Improved gauge force" begin
|
||||
force_pln!(ymws.frc1, ymws.frc2, U, gp.Ubnd, cG, ztw, lp::SpaceParm, c0)
|
||||
end
|
||||
end
|
||||
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz bnd_rescale_flw!(ymws.frc1,lp::SpaceParm)
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
|
||||
"""
|
||||
function force_wilson(ymws::YMworkspace, U, gp::GaugeParm, lp::SpaceParm)
|
||||
|
||||
|
|
|
@ -93,7 +93,7 @@ function Base.show(io::IO, int::FlowIntr{N,T}) where {N,T}
|
|||
if N == 0
|
||||
println(io, " * Euler schem3")
|
||||
elseif N == 1
|
||||
println(io, " * One stage scheme. Coefficients3")
|
||||
println(io, " * One stage scheme. Coefficients")
|
||||
println(io, " stg 1: ", int.e0[1], " ", int.e1[1])
|
||||
elseif N == 2
|
||||
println(io, " * Two stage scheme. Coefficients:")
|
||||
|
@ -201,6 +201,31 @@ function flw(U, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, lp::SpacePar
|
|||
end
|
||||
flw(U, int::FlowIntr{NI,T}, ns::Int64, gp::GaugeParm, lp::SpaceParm, ymws::YMworkspace) where {NI,T} = flw(U, int, ns, int.eps, gp, lp, ymws)
|
||||
|
||||
function flw(U, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, lp::SpaceParm{N,M,BC_OPEN,D}, ymws::YMworkspace) where {NI,T,N,M,D}
|
||||
@timeit "Integrating flow equations" begin
|
||||
for i in 1:ns
|
||||
force_gauge_flw(ymws, U, int.c0, 1, gp, lp)
|
||||
if int.add_zth
|
||||
add_zth_term(ymws::YMworkspace, U, lp)
|
||||
end
|
||||
ymws.mom .= ymws.frc1
|
||||
U .= expm.(U, ymws.mom, 2*eps*int.r)
|
||||
|
||||
for k in 1:NI
|
||||
force_gauge_flw(ymws, U, int.c0, 1, gp, lp)
|
||||
if int.add_zth
|
||||
add_zth_term(ymws::YMworkspace, U, lp)
|
||||
end
|
||||
ymws.mom .= int.e0[k].*ymws.mom .+ int.e1[k].*ymws.frc1
|
||||
U .= expm.(U, ymws.mom, 2*eps)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
flw(U, int::FlowIntr{NI,T}, ns::Int64, gp::GaugeParm, lp::SpaceParm{N,M,BC_OPEN,D}, ymws::YMworkspace) where {NI,T,N,M,D} = flw(U, int, ns, int.eps, gp, lp, ymws)
|
||||
|
||||
|
||||
##
|
||||
# Adaptive step size integrators
|
||||
|
@ -320,30 +345,30 @@ Eoft_plaq(U, gp::GaugeParm{T,G,NN}, lp::SpaceParm{N,M,B,D}, ymws::YMworkspace) w
|
|||
|
||||
|
||||
function krnl_plaq_pln!(plx, U::AbstractArray{T}, Ubnd, ztw, ipl, lp::SpaceParm{N,M,B,D}) where {T,N,M,B,D}
|
||||
|
||||
|
||||
@inbounds begin
|
||||
b = Int64(CUDA.threadIdx().x)
|
||||
r = Int64(CUDA.blockIdx().x)
|
||||
I = point_coord((b,r), lp)
|
||||
|
||||
|
||||
id1, id2 = lp.plidx[ipl]
|
||||
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI)) && (id1 == N)
|
||||
TWP = ((I[id1]==1)&&(I[id2]==1))
|
||||
|
||||
|
||||
bu1, ru1 = up((b, r), id1, lp)
|
||||
bu2, ru2 = up((b, r), id2, lp)
|
||||
|
||||
if SFBC && (ru1 != r)
|
||||
|
||||
if SFBC && (point_time((b,r),lp) == lp.iL[end])
|
||||
gt = Ubnd[id2]
|
||||
else
|
||||
gt = U[bu1,id2,ru1]
|
||||
end
|
||||
|
||||
|
||||
if TWP
|
||||
plx[I] = ztw*tr(U[b,id1,r]*gt / (U[b,id2,r]*U[bu2,id1,ru2]))
|
||||
else
|
||||
plx[I] = tr(U[b,id1,r]*gt / (U[b,id2,r]*U[bu2,id1,ru2]))
|
||||
end
|
||||
end
|
||||
end
|
||||
return nothing
|
||||
end
|
||||
|
|
|
@ -92,7 +92,7 @@ end
|
|||
"""
|
||||
function setbndfield(U, phi, lp::SpaceParm)
|
||||
|
||||
Sets abelian boundary fields with phases `phi[1]` and `phi[2]` to the configuration `U` at time salice ``x_0=0``.
|
||||
Sets abelian boundary fields with phases `phi[1]` and `phi[2]` to the configuration `U` at time slice ``x_0=0``.
|
||||
"""
|
||||
function setbndfield(U, phi, lp::SpaceParm{N,M,B,D}) where {N,M,B,D}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue