mirror of
https://igit.ific.uv.es/alramos/latticegpu.jl.git
synced 2025-06-29 21:39:27 +02:00
Merge branch 'master' of igit.ific.uv.es:alramos/latticegpu.jl
This commit is contained in:
commit
3c59b9251a
41 changed files with 3467 additions and 902 deletions
|
@ -19,20 +19,31 @@ using ..Fields
|
|||
using ..YM
|
||||
using ..Spinors
|
||||
|
||||
"""
|
||||
struct DiracParam{T,R}
|
||||
|
||||
Stores the parameters of the Dirac operator. It can be generated via the constructor `function DiracParam{T}(::Type{R},m0,csw,th,tm,ct)`. The first argument can be ommited and is taken to be `SU3fund`.
|
||||
The parameters are:
|
||||
|
||||
- `m0::T` : Mass of the fermion
|
||||
- `csw::T` : Improvement coefficient for the Csw term
|
||||
- `th{Ntuple{4,Complex{T}}}` : Phase for the fermions included in the boundary conditions, reabsorbed in the Dirac operator.
|
||||
- `tm` : Twisted mass parameter
|
||||
- `ct` : Boundary improvement term, only used for Schrödinger Funtional boundary conditions.
|
||||
"""
|
||||
struct DiracParam{T,R}
|
||||
m0::T
|
||||
csw::T
|
||||
th::NTuple{4,Complex{T}}
|
||||
tm::T
|
||||
ct::T
|
||||
|
||||
|
||||
function DiracParam{T}(::Type{R},m0,csw,th,ct) where {T,R}
|
||||
return new{T,R}(m0,csw,th,ct)
|
||||
function DiracParam{T}(::Type{R},m0,csw,th,tm,ct) where {T,R}
|
||||
return new{T,R}(m0,csw,th,tm,ct)
|
||||
end
|
||||
|
||||
function DiracParam{T}(m0,csw,th,ct) where {T}
|
||||
return new{T,SU3fund}(m0,csw,th,ct)
|
||||
function DiracParam{T}(m0,csw,th,tm,ct) where {T}
|
||||
return new{T,SU3fund}(m0,csw,th,tm,ct)
|
||||
end
|
||||
end
|
||||
function Base.show(io::IO, dpar::DiracParam{T,R}) where {T,R}
|
||||
|
@ -40,11 +51,24 @@ function Base.show(io::IO, dpar::DiracParam{T,R}) where {T,R}
|
|||
println(io, "Wilson fermions in the: ", R, " representation")
|
||||
println(io, " - Bare mass: ", dpar.m0," // Kappa = ",0.5/(dpar.m0+4))
|
||||
println(io, " - Csw : ", dpar.csw)
|
||||
println(io, " - c_t: ", dpar.ct)
|
||||
println(io, " - Theta: ", dpar.th)
|
||||
println(io, " - Twisted mass: ", dpar.tm)
|
||||
println(io, " - c_t: ", dpar.ct)
|
||||
return nothing
|
||||
end
|
||||
|
||||
|
||||
"""
|
||||
struct DiracWorkspace{T}
|
||||
|
||||
Workspace needed to work with fermion fields. It contains four scalar fermion fields and, for the SU2fund and SU3fund, a U(N) field to store the clover term.
|
||||
|
||||
It can be created with the constructor `DiracWorkspace(::Type{G}, ::Type{T}, lp::SpaceParm{4,6,B,D})`. For example:
|
||||
|
||||
dws = DiracWorkspace(SU2fund,Float64,lp);
|
||||
dws = DiracWorkspace(SU3fund,Float64,lp);
|
||||
|
||||
"""
|
||||
struct DiracWorkspace{T}
|
||||
sr
|
||||
sp
|
||||
|
@ -81,573 +105,30 @@ struct DiracWorkspace{T}
|
|||
|
||||
end
|
||||
|
||||
export DiracWorkspace, DiracParam
|
||||
|
||||
|
||||
"""
|
||||
function Csw!(dws, U, gp, lp::SpaceParm)
|
||||
|
||||
Computes the clover and stores it in dws.csw.
|
||||
function mtwmdpar(dpar::DiracParam)
|
||||
|
||||
Returns `dpar` with oposite value of the twisted mass.
|
||||
"""
|
||||
function Csw!(dws, U, gp, lp::SpaceParm{4,6,B,D}) where {B,D}
|
||||
|
||||
@timeit "Csw computation" begin
|
||||
|
||||
for i in 1:Int(lp.npls)
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_csw!(dws.csw, U, gp.Ubnd, i, lp)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function krnl_csw!(csw::AbstractArray{T}, U, Ubnd, ipl, lp::SpaceParm{4,M,B,D}) where {T,M,B,D}
|
||||
|
||||
@inbounds begin
|
||||
b = Int64(CUDA.threadIdx().x)
|
||||
r = Int64(CUDA.blockIdx().x)
|
||||
I = point_coord((b,r), lp)
|
||||
it = I[4]
|
||||
|
||||
id1, id2 = lp.plidx[ipl]
|
||||
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == 4)
|
||||
|
||||
bu1, ru1 = up((b, r), id1, lp)
|
||||
bu2, ru2 = up((b, r), id2, lp)
|
||||
bd1, rd1 = dw((b, r), id1, lp)
|
||||
bd2, rd2 = dw((b, r), id2, lp)
|
||||
bdd, rdd = dw((bd1, rd1), id2, lp)
|
||||
bud, rud = dw((bu1, ru1), id2, lp)
|
||||
bdu, rdu = up((bd1, rd1), id2, lp)
|
||||
|
||||
if SFBC && (it == lp.iL[end])
|
||||
gt1 = Ubnd[id2]
|
||||
gt2 = Ubnd[id2]
|
||||
else
|
||||
gt1 = U[bu1,id2,ru1]
|
||||
gt2 = U[bud,id2,rud]
|
||||
end
|
||||
|
||||
M1 = U[b,id1,r]*gt1/(U[b,id2,r]*U[bu2,id1,ru2])
|
||||
M2 = (U[bd2,id2,rd2]\(U[bd2,id1,rd2]*gt2))/U[b,id1,r]
|
||||
M3 = (U[bdd,id2,rdd]*U[bd1,id1,rd1])\(U[bdd,id1,rdd]*U[bd2,id2,rd2])
|
||||
M4 = (U[b,id2,r]/(U[bd1,id2,rd1]*U[bdu,id1,rdu]))*U[bd1,id1,rd1]
|
||||
|
||||
|
||||
if !(SFBC && (it == 1))
|
||||
csw[b,ipl,r] = 0.125*(antsym(M1)+antsym(M2)+antsym(M3)+antsym(M4))
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
|
||||
|
||||
if abs(dpar.csw) > 1.0E-10
|
||||
@timeit "Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, lp)
|
||||
end
|
||||
end
|
||||
else
|
||||
@timeit "Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.th, lp)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function krnl_Dwimpr!(so, U, si, Fcsw, m0, th, csw, lp::SpaceParm{4,6,B,D}) where {B,D}
|
||||
|
||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
bu1, ru1 = up((b,r), 1, lp)
|
||||
bd1, rd1 = dw((b,r), 1, lp)
|
||||
bu2, ru2 = up((b,r), 2, lp)
|
||||
bd2, rd2 = dw((b,r), 2, lp)
|
||||
bu3, ru3 = up((b,r), 3, lp)
|
||||
bd3, rd3 = dw((b,r), 3, lp)
|
||||
bu4, ru4 = up((b,r), 4, lp)
|
||||
bd4, rd4 = dw((b,r), 4, lp)
|
||||
|
||||
@inbounds begin
|
||||
|
||||
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
|
||||
|
||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function krnl_Dw!(so, U, si, m0, th, lp::SpaceParm{4,6,B,D}) where {B,D}
|
||||
|
||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
bu1, ru1 = up((b,r), 1, lp)
|
||||
bd1, rd1 = dw((b,r), 1, lp)
|
||||
bu2, ru2 = up((b,r), 2, lp)
|
||||
bd2, rd2 = dw((b,r), 2, lp)
|
||||
bu3, ru3 = up((b,r), 3, lp)
|
||||
bd3, rd3 = dw((b,r), 3, lp)
|
||||
bu4, ru4 = up((b,r), 4, lp)
|
||||
bd4, rd4 = dw((b,r), 4, lp)
|
||||
|
||||
@inbounds begin
|
||||
|
||||
so[b,r] = (4+m0)*si[b,r]
|
||||
|
||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||
|
||||
if abs(dpar.csw) > 1.0E-10
|
||||
@timeit "Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, dpar.ct, lp)
|
||||
end
|
||||
end
|
||||
else
|
||||
@timeit "Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.th, dpar.ct, lp)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function krnl_Dwimpr!(so, U, si, Fcsw, m0, th, csw, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||
|
||||
# The field si is assumed to be zero at t = 0
|
||||
|
||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
if (point_time((b,r),lp) != 1)
|
||||
|
||||
bu1, ru1 = up((b,r), 1, lp)
|
||||
bd1, rd1 = dw((b,r), 1, lp)
|
||||
bu2, ru2 = up((b,r), 2, lp)
|
||||
bd2, rd2 = dw((b,r), 2, lp)
|
||||
bu3, ru3 = up((b,r), 3, lp)
|
||||
bd3, rd3 = dw((b,r), 3, lp)
|
||||
bu4, ru4 = up((b,r), 4, lp)
|
||||
bd4, rd4 = dw((b,r), 4, lp)
|
||||
|
||||
@inbounds begin
|
||||
|
||||
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
|
||||
|
||||
|
||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||
|
||||
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
|
||||
so[b,r] += (ct-1.0)*si[b,r]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function krnl_Dw!(so, U, si, m0, th, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||
|
||||
# The field si is assumed to be zero at t = 0
|
||||
|
||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
if (point_time((b,r),lp) != 1)
|
||||
|
||||
bu1, ru1 = up((b,r), 1, lp)
|
||||
bd1, rd1 = dw((b,r), 1, lp)
|
||||
bu2, ru2 = up((b,r), 2, lp)
|
||||
bd2, rd2 = dw((b,r), 2, lp)
|
||||
bu3, ru3 = up((b,r), 3, lp)
|
||||
bd3, rd3 = dw((b,r), 3, lp)
|
||||
bu4, ru4 = up((b,r), 4, lp)
|
||||
bd4, rd4 = dw((b,r), 4, lp)
|
||||
|
||||
@inbounds begin
|
||||
|
||||
so[b,r] = (4+m0)*si[b,r]
|
||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||
|
||||
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
|
||||
so[b,r] += (ct-1.0)*si[b,r]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
|
||||
|
||||
if abs(dpar.csw) > 1.0E-10
|
||||
@timeit "g5Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, lp)
|
||||
end
|
||||
end
|
||||
else
|
||||
@timeit "g5Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.th, lp)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, th, csw, lp::SpaceParm{4,6,B,D}) where {B,D}
|
||||
|
||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
bu1, ru1 = up((b,r), 1, lp)
|
||||
bd1, rd1 = dw((b,r), 1, lp)
|
||||
bu2, ru2 = up((b,r), 2, lp)
|
||||
bd2, rd2 = dw((b,r), 2, lp)
|
||||
bu3, ru3 = up((b,r), 3, lp)
|
||||
bd3, rd3 = dw((b,r), 3, lp)
|
||||
bu4, ru4 = up((b,r), 4, lp)
|
||||
bd4, rd4 = dw((b,r), 4, lp)
|
||||
|
||||
@inbounds begin
|
||||
|
||||
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
|
||||
|
||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||
|
||||
so[b,r] = dmul(Gamma{5}, so[b,r])
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function krnl_g5Dw!(so, U, si, m0, th, lp::SpaceParm{4,6,B,D}) where {B,D}
|
||||
|
||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
bu1, ru1 = up((b,r), 1, lp)
|
||||
bd1, rd1 = dw((b,r), 1, lp)
|
||||
bu2, ru2 = up((b,r), 2, lp)
|
||||
bd2, rd2 = dw((b,r), 2, lp)
|
||||
bu3, ru3 = up((b,r), 3, lp)
|
||||
bd3, rd3 = dw((b,r), 3, lp)
|
||||
bu4, ru4 = up((b,r), 4, lp)
|
||||
bd4, rd4 = dw((b,r), 4, lp)
|
||||
|
||||
@inbounds begin
|
||||
|
||||
so[b,r] = (4+m0)*si[b,r]
|
||||
|
||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||
|
||||
so[b,r] = dmul(Gamma{5}, so[b,r])
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||
|
||||
if abs(dpar.csw) > 1.0E-10
|
||||
@timeit "g5Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, dpar.ct, lp)
|
||||
end
|
||||
end
|
||||
else
|
||||
@timeit "g5Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.th, dpar.ct, lp)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, th, csw, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||
|
||||
# The field si is assumed to be zero at t = 0
|
||||
|
||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
if (point_time((b,r),lp) != 1)
|
||||
|
||||
bu1, ru1 = up((b,r), 1, lp)
|
||||
bd1, rd1 = dw((b,r), 1, lp)
|
||||
bu2, ru2 = up((b,r), 2, lp)
|
||||
bd2, rd2 = dw((b,r), 2, lp)
|
||||
bu3, ru3 = up((b,r), 3, lp)
|
||||
bd3, rd3 = dw((b,r), 3, lp)
|
||||
bu4, ru4 = up((b,r), 4, lp)
|
||||
bd4, rd4 = dw((b,r), 4, lp)
|
||||
|
||||
@inbounds begin
|
||||
|
||||
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
|
||||
|
||||
|
||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||
|
||||
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
|
||||
so[b,r] += (ct-1.0)*si[b,r]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
so[b,r] = dmul(Gamma{5}, so[b,r])
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function krnl_g5Dw!(so, U, si, m0, th, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||
|
||||
# The field si is assumed to be zero at t = 0
|
||||
|
||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
if (point_time((b,r),lp) != 1)
|
||||
|
||||
bu1, ru1 = up((b,r), 1, lp)
|
||||
bd1, rd1 = dw((b,r), 1, lp)
|
||||
bu2, ru2 = up((b,r), 2, lp)
|
||||
bd2, rd2 = dw((b,r), 2, lp)
|
||||
bu3, ru3 = up((b,r), 3, lp)
|
||||
bd3, rd3 = dw((b,r), 3, lp)
|
||||
bu4, ru4 = up((b,r), 4, lp)
|
||||
bd4, rd4 = dw((b,r), 4, lp)
|
||||
|
||||
@inbounds begin
|
||||
|
||||
so[b,r] = (4+m0)*si[b,r]
|
||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||
|
||||
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
|
||||
so[b,r] += (ct-1.0)*si[b,r]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
so[b,r] = dmul(Gamma{5}, so[b,r])
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||
|
||||
if abs(dpar.csw) > 1.0E-10
|
||||
@timeit "DwdagDw" begin
|
||||
|
||||
@timeit "g5Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, dpar.ct, lp)
|
||||
end
|
||||
end
|
||||
|
||||
@timeit "g5Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, dpar.th, dpar.csw, dpar.ct, lp)
|
||||
end
|
||||
end
|
||||
end
|
||||
else
|
||||
@timeit "DwdagDw" begin
|
||||
|
||||
@timeit "g5Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.th, dpar.ct, lp)
|
||||
end
|
||||
end
|
||||
|
||||
@timeit "g5Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, dpar.th, dpar.ct, lp)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
|
||||
|
||||
if abs(dpar.csw) > 1.0E-10
|
||||
@timeit "DwdagDw" begin
|
||||
|
||||
@timeit "g5Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, lp)
|
||||
end
|
||||
end
|
||||
|
||||
@timeit "g5Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, dpar.th, dpar.csw, lp)
|
||||
end
|
||||
end
|
||||
end
|
||||
else
|
||||
@timeit "DwdagDw" begin
|
||||
|
||||
@timeit "g5Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.th, lp)
|
||||
end
|
||||
end
|
||||
|
||||
@timeit "g5Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, dpar.th, lp)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return nothing
|
||||
function mtwmdpar(dpar::DiracParam{P,R}) where {P,R}
|
||||
return DiracParam{P}(R,dpar.m0,dpar.csw,dpar.th,-dpar.tm,dpar.ct)
|
||||
end
|
||||
|
||||
|
||||
function SF_bndfix!(sp, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(sp, lp)
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
export DiracWorkspace, DiracParam, mtwmdpar
|
||||
|
||||
function krnl_sfbndfix!(sp,lp::SpaceParm)
|
||||
b=Int64(CUDA.threadIdx().x)
|
||||
r=Int64(CUDA.blockIdx().x)
|
||||
|
||||
if (point_time((b,r),lp) == 1)
|
||||
sp[b,r] = 0.0*sp[b,r]
|
||||
end
|
||||
return nothing
|
||||
end
|
||||
|
||||
|
||||
"""
|
||||
function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund / SU2fund {T}}}, lp::SpaceParm, t::Int64 = 0)
|
||||
|
||||
Randomizes the SU2fund / SU3fund fermion field. If the argument t is present, it only randomizes that time-slice.
|
||||
"""
|
||||
function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund{T}}}, lp::SpaceParm, t::Int64 = 0) where {T}
|
||||
|
||||
@timeit "Randomize pseudofermion field" begin
|
||||
p = ntuple(i->CUDA.randn(T, lp.bsz, 3, lp.rsz,2),4) # complex generation not suported for Julia 1.5.4
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_pf_su3!(f,p,lp,t)
|
||||
end
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function krnl_assign_pf_su3!(f::AbstractArray, p , lp::SpaceParm, t::Int64)
|
||||
|
||||
@inbounds begin
|
||||
b = Int64(CUDA.threadIdx().x)
|
||||
r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
if t == 0
|
||||
f[b,r] = Spinor(map(x->SU3fund(x[b,1,r,1] + im* x[b,1,r,2],
|
||||
x[b,2,r,1] + im* x[b,2,r,2],
|
||||
x[b,3,r,1] + im* x[b,3,r,2]),p))
|
||||
elseif point_time((b,r),lp) == t
|
||||
f[b,r] = Spinor(map(x->SU3fund(x[b,1,r,1] + im* x[b,1,r,2],
|
||||
x[b,2,r,1] + im* x[b,2,r,2],
|
||||
x[b,3,r,1] + im* x[b,3,r,2]),p))
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function pfrandomize!(f::AbstractArray{Spinor{4, SU2fund{T}}},lp::SpaceParm, t::Int64=0) where {T}
|
||||
|
||||
@timeit "Randomize pseudofermion field" begin
|
||||
p = ntuple(i->CUDA.randn(T, lp.bsz, 2, lp.rsz,2),4) # complex generation not suported for Julia 1.5.4
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_pf_su2!(f,p,lp,t)
|
||||
end
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function krnl_assign_pf_su2!(f::AbstractArray, p , lp::SpaceParm, t::Int64)
|
||||
|
||||
@inbounds begin
|
||||
b = Int64(CUDA.threadIdx().x)
|
||||
r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
if t == 0
|
||||
f[b,r] = Spinor(map(x->SU2fund(x[b,1,r,1] + im* x[b,1,r,2],
|
||||
x[b,2,r,1] + im* x[b,2,r,2]),p))
|
||||
elseif point_time((b,r),lp) == t
|
||||
f[b,r] = Spinor(map(x->SU2fund(x[b,1,r,1] + im* x[b,1,r,2],
|
||||
x[b,2,r,1] + im* x[b,2,r,2]),p))
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
export Dw!, g5Dw!, DwdagDw!, SF_bndfix!, Csw!, pfrandomize!
|
||||
include("Diracfields.jl")
|
||||
export SF_bndfix!, Csw!, pfrandomize!
|
||||
|
||||
include("Diracoper.jl")
|
||||
export Dw!, g5Dw!, DwdagDw!
|
||||
|
||||
include("DiracIO.jl")
|
||||
export read_prop, save_prop, read_dpar
|
||||
|
||||
include("Diracflow.jl")
|
||||
export Nablanabla!, Dslash_sq!, flw, backflow
|
||||
|
||||
|
||||
end
|
||||
|
|
|
@ -41,7 +41,7 @@ function read_prop(fname::String)
|
|||
footh = Vector{Float64}(undef, 4)
|
||||
|
||||
lp = SpaceParm{ndim}(iL, (4,4,4,4), ibc, ntw)
|
||||
dpar = DiracParam{Float64}(SU3fund,foopars[1],foopars[2],ntuple(i -> footh[i], 4),foopars[3])
|
||||
dpar = DiracParam{Float64}(SU3fund,foopars[1],foopars[2],ntuple(i -> footh[i], 4),foopars[3],foopars[4])
|
||||
|
||||
|
||||
dtr = (2,3,4,1)
|
||||
|
@ -100,7 +100,7 @@ function save_prop(fname::String, psi, lp::SpaceParm{4,M,B,D}, dpar::DiracParam;
|
|||
BDIO_write!(fb, [convert(Int32, B)])
|
||||
BDIO_write!(fb, [convert(Int32, lp.iL[i]) for i in 1:4])
|
||||
BDIO_write!(fb, [convert(Int32, lp.ntw[i]) for i in 1:M])
|
||||
BDIO_write!(fb, [dpar.m0, dpar.csw, dpar.ct])
|
||||
BDIO_write!(fb, [dpar.m0, dpar.csw, dpar.tm, dpar.ct])
|
||||
BDIO_write!(fb, [dpar.th[i] for i in 1:4])
|
||||
end
|
||||
BDIO_write_hash!(fb)
|
||||
|
@ -175,9 +175,9 @@ function read_dpar(fname::String)
|
|||
footh = Vector{Float64}(undef, 4)
|
||||
|
||||
lp = SpaceParm{ndim}(iL, (4,4,4,4), ibc, ntw)
|
||||
dpar = DiracParam{Float64}(SU3fund,foopars[1],foopars[2],ntuple(i -> footh[i], 4),foopars[3])
|
||||
dpar = DiracParam{Float64}(SU3fund,foopars[1],foopars[2],ntuple(i -> footh[i], 4),foopars[3],foopars[4])
|
||||
|
||||
|
||||
BDIO_close!(fb)
|
||||
return dpar, lp
|
||||
end
|
||||
end
|
||||
|
|
211
src/Dirac/Diracfields.jl
Normal file
211
src/Dirac/Diracfields.jl
Normal file
|
@ -0,0 +1,211 @@
|
|||
|
||||
|
||||
|
||||
"""
|
||||
function Csw!(dws, U, gp, lp::SpaceParm)
|
||||
|
||||
Computes the clover and stores it in dws.csw.
|
||||
|
||||
"""
|
||||
function Csw!(dws, U, gp, lp::SpaceParm{4,6,B,D}) where {B,D}
|
||||
|
||||
@timeit "Csw computation" begin
|
||||
|
||||
for i in 1:Int(lp.npls)
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_csw!(dws.csw, U, gp.Ubnd, i, lp)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function krnl_csw!(csw::AbstractArray{T}, U, Ubnd, ipl, lp::SpaceParm{4,M,B,D}) where {T,M,B,D}
|
||||
|
||||
@inbounds begin
|
||||
b = Int64(CUDA.threadIdx().x)
|
||||
r = Int64(CUDA.blockIdx().x)
|
||||
I = point_coord((b,r), lp)
|
||||
it = I[4]
|
||||
|
||||
id1, id2 = lp.plidx[ipl]
|
||||
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == 4)
|
||||
OBC = (B == BC_OPEN) && ((it == 1) || (it == lp.iL[end]))
|
||||
|
||||
bu1, ru1 = up((b, r), id1, lp)
|
||||
bu2, ru2 = up((b, r), id2, lp)
|
||||
bd1, rd1 = dw((b, r), id1, lp)
|
||||
bd2, rd2 = dw((b, r), id2, lp)
|
||||
bdd, rdd = dw((bd1, rd1), id2, lp)
|
||||
bud, rud = dw((bu1, ru1), id2, lp)
|
||||
bdu, rdu = up((bd1, rd1), id2, lp)
|
||||
|
||||
if SFBC && (it == lp.iL[end])
|
||||
gt1 = Ubnd[id2]
|
||||
gt2 = Ubnd[id2]
|
||||
else
|
||||
gt1 = U[bu1,id2,ru1]
|
||||
gt2 = U[bud,id2,rud]
|
||||
end
|
||||
|
||||
M1 = U[b,id1,r]*gt1/(U[b,id2,r]*U[bu2,id1,ru2])
|
||||
M2 = (U[bd2,id2,rd2]\(U[bd2,id1,rd2]*gt2))/U[b,id1,r]
|
||||
M3 = (U[bdd,id2,rdd]*U[bd1,id1,rd1])\(U[bdd,id1,rdd]*U[bd2,id2,rd2])
|
||||
M4 = (U[b,id2,r]/(U[bd1,id2,rd1]*U[bdu,id1,rdu]))*U[bd1,id1,rd1]
|
||||
|
||||
|
||||
if !(SFBC && (it == 1)) && !OBC
|
||||
csw[b,ipl,r] = 0.125*(antsym(M1)+antsym(M2)+antsym(M3)+antsym(M4))
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
|
||||
|
||||
"""
|
||||
SF_bndfix!(sp, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}})
|
||||
|
||||
Sets all the values of `sp` in the first time slice to zero.
|
||||
"""
|
||||
function SF_bndfix!(sp, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||
@timeit "SF boundary fix" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(sp, lp)
|
||||
end
|
||||
end
|
||||
return nothing
|
||||
end
|
||||
|
||||
function krnl_sfbndfix!(sp,lp::SpaceParm)
|
||||
b=Int64(CUDA.threadIdx().x)
|
||||
r=Int64(CUDA.blockIdx().x)
|
||||
|
||||
if (point_time((b,r),lp) == 1)
|
||||
sp[b,r] = 0.0*sp[b,r]
|
||||
end
|
||||
return nothing
|
||||
end
|
||||
|
||||
"""
|
||||
SF_bndfix!(sp, lp::SpaceParm{4,6,BC_OPEN,D})
|
||||
|
||||
Sets all the values of `sp` in the first and last time slice to zero.
|
||||
"""
|
||||
function SF_bndfix!(sp, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||
@timeit "SF boundary fix" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_opbndfix!(sp, lp)
|
||||
end
|
||||
end
|
||||
return nothing
|
||||
end
|
||||
|
||||
function krnl_opbndfix!(sp,lp::SpaceParm)
|
||||
b=Int64(CUDA.threadIdx().x)
|
||||
r=Int64(CUDA.blockIdx().x)
|
||||
|
||||
if ((point_time((b,r),lp) == 1) || (point_time((b,r),lp) == lp.iL[end]))
|
||||
sp[b,r] = 0.0*sp[b,r]
|
||||
end
|
||||
return nothing
|
||||
end
|
||||
|
||||
|
||||
"""
|
||||
function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund / SU2fund {T}}}, lp::SpaceParm, t::Int64 = 0)
|
||||
|
||||
Randomizes the SU2fund / SU3fund fermion field. If the argument t is present, it only randomizes that time-slice.
|
||||
"""
|
||||
function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund{T}}}, lp::SpaceParm{4,6,BC_PERIODIC,D}, t::Int64 = 0) where {T,D}
|
||||
|
||||
@timeit "Randomize pseudofermion field" begin
|
||||
p = ntuple(i->CUDA.randn(T, lp.bsz, 3, lp.rsz,2),4)./sqrt(2) # complex generation not suported for Julia 1.5.4
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_pf_su3!(f,p,lp,t)
|
||||
end
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund{T}}}, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D},SpaceParm{4,6,BC_OPEN,D}}, t::Int64 = 0) where {T,D}
|
||||
|
||||
@timeit "Randomize pseudofermion field" begin
|
||||
p = ntuple(i->CUDA.randn(T, lp.bsz, 3, lp.rsz,2),4)./sqrt(2) # complex generation not suported for Julia 1.5.4
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_pf_su3!(f,p,lp,t)
|
||||
end
|
||||
end
|
||||
SF_bndfix!(f,lp)
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function krnl_assign_pf_su3!(f::AbstractArray, p , lp::SpaceParm, t::Int64)
|
||||
|
||||
@inbounds begin
|
||||
b = Int64(CUDA.threadIdx().x)
|
||||
r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
if t == 0
|
||||
f[b,r] = Spinor(map(x->SU3fund(x[b,1,r,1] + im* x[b,1,r,2],
|
||||
x[b,2,r,1] + im* x[b,2,r,2],
|
||||
x[b,3,r,1] + im* x[b,3,r,2]),p))
|
||||
elseif point_time((b,r),lp) == t
|
||||
f[b,r] = Spinor(map(x->SU3fund(x[b,1,r,1] + im* x[b,1,r,2],
|
||||
x[b,2,r,1] + im* x[b,2,r,2],
|
||||
x[b,3,r,1] + im* x[b,3,r,2]),p))
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function pfrandomize!(f::AbstractArray{Spinor{4, SU2fund{T}}}, lp::SpaceParm{4,6,BC_PERIODIC,D}, t::Int64 = 0) where {T,D}
|
||||
|
||||
@timeit "Randomize pseudofermion field" begin
|
||||
p = ntuple(i->CUDA.randn(T, lp.bsz, 3, lp.rsz,2),4)./sqrt(2) # complex generation not suported for Julia 1.5.4
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_pf_su2!(f,p,lp,t)
|
||||
end
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function pfrandomize!(f::AbstractArray{Spinor{4, SU2fund{T}}}, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D},SpaceParm{4,6,BC_OPEN,D}}, t::Int64 = 0) where {T,D}
|
||||
|
||||
@timeit "Randomize pseudofermion field" begin
|
||||
p = ntuple(i->CUDA.randn(T, lp.bsz, 3, lp.rsz,2),4)./sqrt(2) # complex generation not suported for Julia 1.5.4
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_pf_su2!(f,p,lp,t)
|
||||
end
|
||||
end
|
||||
SF_bndfix!(f,lp)
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function krnl_assign_pf_su2!(f::AbstractArray, p , lp::SpaceParm, t::Int64)
|
||||
|
||||
@inbounds begin
|
||||
b = Int64(CUDA.threadIdx().x)
|
||||
r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
if t == 0
|
||||
f[b,r] = Spinor(map(x->SU2fund(x[b,1,r,1] + im* x[b,1,r,2],
|
||||
x[b,2,r,1] + im* x[b,2,r,2]),p))
|
||||
elseif point_time((b,r),lp) == t
|
||||
f[b,r] = Spinor(map(x->SU2fund(x[b,1,r,1] + im* x[b,1,r,2],
|
||||
x[b,2,r,1] + im* x[b,2,r,2]),p))
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
456
src/Dirac/Diracflow.jl
Normal file
456
src/Dirac/Diracflow.jl
Normal file
|
@ -0,0 +1,456 @@
|
|||
|
||||
import ..YM.flw, ..YM.force_gauge, ..YM.flw_adapt
|
||||
|
||||
|
||||
function flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T}
|
||||
@timeit "Integrating flow equations" begin
|
||||
for i in 1:ns
|
||||
force_gauge(ymws, U, int.c0, 1, gp, lp)
|
||||
|
||||
if int.add_zth
|
||||
add_zth_term(ymws::YMworkspace, U, lp)
|
||||
end
|
||||
|
||||
Nablanabla!(dws.sAp, U, psi, dpar, dws, lp)
|
||||
psi .= psi + 2*int.r*eps*dws.sAp
|
||||
|
||||
ymws.mom .= ymws.frc1
|
||||
U .= expm.(U, ymws.mom, 2*eps*int.r)
|
||||
|
||||
for k in 1:NI
|
||||
force_gauge(ymws, U, int.c0, 1, gp, lp)
|
||||
|
||||
if int.add_zth
|
||||
add_zth_term(ymws::YMworkspace, U, lp)
|
||||
end
|
||||
|
||||
Nablanabla!(dws.sp, U, psi, dpar, dws, lp)
|
||||
dws.sAp .= int.e0[k].*dws.sAp .+ int.e1[k].*dws.sp
|
||||
psi .= psi + 2*eps*dws.sAp
|
||||
|
||||
ymws.mom .= int.e0[k].*ymws.mom .+ int.e1[k].*ymws.frc1
|
||||
U .= expm.(U, ymws.mom, 2*eps)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T} = flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, int.eps, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace)
|
||||
|
||||
"""
|
||||
function backflow(psi, U, Dt, nsave::Int64, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace)
|
||||
|
||||
Performs one step back in flow time for the fermion field, according to 1302.5246. The fermion field must me that of the time-slice Dt and is flowed back to the first time-slice
|
||||
nsave is the total number of gauge fields saved in the process
|
||||
|
||||
"""
|
||||
function backflow(psi, U, Dt, maxnsave::Int64, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace)
|
||||
|
||||
int = wfl_rk3(Float64,0.01,1.0) # Default integrator, it has to be order 3 rk but in can be zfl
|
||||
|
||||
@timeit "Backflow integration" begin
|
||||
@timeit "GPU to CPU" U0 = Array(U)
|
||||
|
||||
nt,eps_all = flw_adapt(U, int, Dt, gp, lp, ymws)
|
||||
|
||||
nsave = min(maxnsave,nt)
|
||||
|
||||
nsave != 0 ? dsave = Int64(floor(nt/nsave)) : dsave = nt
|
||||
Usave = Vector{typeof(U0)}(undef,nsave)
|
||||
|
||||
@timeit "CPU to GPU" copyto!(U,U0)
|
||||
for i in 1:(dsave*nsave)
|
||||
flw(U, int, 1, eps_all[i], gp, lp, ymws)
|
||||
(i%dsave)==0 ? Usave[Int64(i/dsave)] = Array(U) : nothing
|
||||
end
|
||||
|
||||
for j in (nt%nsave):-1:1
|
||||
@timeit "CPU to GPU" copyto!(U,Usave[end])
|
||||
for k in 1:j-1
|
||||
flw(U, int, 1, eps_all[nsave*dsave + k], gp, lp, ymws)
|
||||
end
|
||||
bflw_step!(psi, U, eps_all[nsave*dsave + j], int::FlowIntr, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace)
|
||||
end
|
||||
|
||||
for i in (nsave-1):-1:1
|
||||
for j in dsave:-1:1
|
||||
@timeit "CPU to GPU" copyto!(U,Usave[i])
|
||||
for k in 1:j-1
|
||||
flw(U, int, 1, eps_all[i*dsave + k], gp, lp, ymws)
|
||||
end
|
||||
bflw_step!(psi, U, eps_all[i*dsave + j], int::FlowIntr, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace)
|
||||
end
|
||||
end
|
||||
|
||||
@timeit "CPU to GPU" copyto!(U,U0)
|
||||
|
||||
for j in dsave:-1:1
|
||||
@timeit "CPU to GPU" copyto!(U,U0)
|
||||
for k in 1:j-1
|
||||
flw(U, int, 1, eps_all[k], gp, lp, ymws)
|
||||
end
|
||||
bflw_step!(psi, U, eps_all[j], int::FlowIntr, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace)
|
||||
end
|
||||
|
||||
@timeit "CPU to GPU" copyto!(U,U0)
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
"""
|
||||
function bflw_step!(U, psi, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace)
|
||||
|
||||
Performs ONE backstep in psi, from t to t-\eps. U is supposed to be the one in t-\eps and is left unchanged. So far, int has to be rk4
|
||||
"""
|
||||
function bflw_step!(psi, U, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace)
|
||||
|
||||
@timeit "Backflow step" begin
|
||||
|
||||
V = copy(U)
|
||||
V .= U
|
||||
|
||||
force_gauge(ymws, U, int.c0, 1, gp, lp)
|
||||
|
||||
if int.add_zth
|
||||
add_zth_term(ymws::YMworkspace, U, lp)
|
||||
end
|
||||
|
||||
ymws.mom .= ymws.frc1
|
||||
U .= expm.(U, ymws.mom, 2*eps*int.r)
|
||||
|
||||
force_gauge(ymws, U, int.c0, 1, gp, lp)
|
||||
|
||||
if int.add_zth
|
||||
add_zth_term(ymws::YMworkspace, U, lp)
|
||||
end
|
||||
|
||||
ymws.mom .= int.e0[1].*ymws.mom .+ int.e1[1].*ymws.frc1
|
||||
U .= expm.(U, ymws.mom, 2*eps)
|
||||
|
||||
Nablanabla!(dws.sp, U, 0.75*2*eps*psi, dpar, dws, lp)
|
||||
|
||||
U .= V
|
||||
|
||||
force_gauge(ymws, U, int.c0, 1, gp, lp)
|
||||
|
||||
if int.add_zth
|
||||
add_zth_term(ymws::YMworkspace, U, lp)
|
||||
end
|
||||
|
||||
U .= expm.(U, ymws.frc1, 2*eps*int.r)
|
||||
|
||||
Nablanabla!(dws.sAp, U, 2*eps*dws.sp, dpar, dws, lp)
|
||||
dws.sAp .= psi + (8/9)*dws.sAp
|
||||
|
||||
U .= V
|
||||
|
||||
Nablanabla!(psi, U, 2*eps*(dws.sAp - (8/9)*dws.sp), dpar, dws, lp)
|
||||
psi .= (1/4)*psi + dws.sp + dws.sAp
|
||||
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
|
||||
function flw_adapt(U, psi, int::FlowIntr{NI,T}, tend::T, epsini::T, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T}
|
||||
|
||||
eps = epsini
|
||||
dt = tend
|
||||
nstp = 0
|
||||
eps_all = Vector{T}(undef,0)
|
||||
while true
|
||||
ns = convert(Int64, floor(dt/eps))
|
||||
if ns > 10
|
||||
flw(U, psi, int, 9, eps, gp, dpar, lp, ymws, dws)
|
||||
ymws.U1 .= U
|
||||
flw(U, psi, int, 1, eps, gp, dpar, lp, ymws, dws)
|
||||
flw(ymws.U1, int, 2, eps/2, gp, lp, ymws)
|
||||
|
||||
dt = dt - 10*eps
|
||||
nstp = nstp + 10
|
||||
push!(eps_all,ntuple(i->eps,10)...)
|
||||
|
||||
# adjust step size
|
||||
ymws.U1 .= ymws.U1 ./ U
|
||||
maxd = CUDA.mapreduce(dev_one, max, ymws.U1, init=zero(tend))
|
||||
eps = min(int.max_eps, 2*eps, int.sft_fac*eps*(int.tol/maxd)^(one(tend)/3))
|
||||
|
||||
else
|
||||
flw(U, psi, int, ns, eps, gp, dpar, lp, ymws, dws)
|
||||
dt = dt - ns*eps
|
||||
|
||||
push!(eps_all,ntuple(i->eps,ns)...)
|
||||
push!(eps_all,dt)
|
||||
|
||||
flw(U, psi, int, 1, dt, gp, dpar, lp, ymws, dws)
|
||||
dt = zero(tend)
|
||||
|
||||
nstp = nstp + ns + 1
|
||||
end
|
||||
|
||||
if dt == zero(tend)
|
||||
break
|
||||
end
|
||||
end
|
||||
|
||||
return nstp, eps_all
|
||||
end
|
||||
flw_adapt(U, psi, int::FlowIntr{NI,T}, tend::T, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T} = flw_adapt(U, psi, int, tend, int.eps_ini, gp, dpar, lp, ymws, dws)
|
||||
|
||||
|
||||
"""
|
||||
|
||||
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
|
||||
|
||||
Computes /`/` \\nabla^* \\nabla /`/` `si` and stores it in `si`.
|
||||
|
||||
"""
|
||||
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
|
||||
@timeit "Laplacian" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Nablanabla(so, U, si, dpar.th, lp)
|
||||
end
|
||||
end
|
||||
return nothing
|
||||
end
|
||||
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D},SpaceParm{4,6,BC_OPEN,D}}) where {D}
|
||||
SF_bndfix!(si,lp)
|
||||
@timeit "Laplacian" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Nablanabla(so, U, si, dpar.th, lp)
|
||||
end
|
||||
end
|
||||
SF_bndfix!(so,lp)
|
||||
return nothing
|
||||
end
|
||||
|
||||
|
||||
function krnl_Nablanabla(so, U, si, th, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||
|
||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
@inbounds begin
|
||||
|
||||
if ((point_time((b,r),lp) != 1) && (point_time((b,r),lp) != lp.iL[end]))
|
||||
|
||||
so[b,r] = -4*si[b,r]
|
||||
|
||||
bu1, ru1 = up((b,r), 1, lp)
|
||||
bd1, rd1 = dw((b,r), 1, lp)
|
||||
bu2, ru2 = up((b,r), 2, lp)
|
||||
bd2, rd2 = dw((b,r), 2, lp)
|
||||
bu3, ru3 = up((b,r), 3, lp)
|
||||
bd3, rd3 = dw((b,r), 3, lp)
|
||||
bu4, ru4 = up((b,r), 4, lp)
|
||||
bd4, rd4 = dw((b,r), 4, lp)
|
||||
|
||||
so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) +
|
||||
th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) +
|
||||
th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) +
|
||||
th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) )
|
||||
end
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function krnl_Nablanabla(so, U, si, th, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
|
||||
|
||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
@inbounds begin
|
||||
|
||||
so[b,r] = -4*si[b,r]
|
||||
|
||||
bu1, ru1 = up((b,r), 1, lp)
|
||||
bd1, rd1 = dw((b,r), 1, lp)
|
||||
bu2, ru2 = up((b,r), 2, lp)
|
||||
bd2, rd2 = dw((b,r), 2, lp)
|
||||
bu3, ru3 = up((b,r), 3, lp)
|
||||
bd3, rd3 = dw((b,r), 3, lp)
|
||||
bu4, ru4 = up((b,r), 4, lp)
|
||||
bd4, rd4 = dw((b,r), 4, lp)
|
||||
|
||||
so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) +
|
||||
th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) +
|
||||
th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) +
|
||||
th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) )
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function krnl_Nablanabla(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||
|
||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
@inbounds begin
|
||||
|
||||
if (point_time((b,r),lp) != 1)
|
||||
|
||||
so[b,r] = -4*si[b,r]
|
||||
|
||||
bu1, ru1 = up((b,r), 1, lp)
|
||||
bd1, rd1 = dw((b,r), 1, lp)
|
||||
bu2, ru2 = up((b,r), 2, lp)
|
||||
bd2, rd2 = dw((b,r), 2, lp)
|
||||
bu3, ru3 = up((b,r), 3, lp)
|
||||
bd3, rd3 = dw((b,r), 3, lp)
|
||||
bu4, ru4 = up((b,r), 4, lp)
|
||||
bd4, rd4 = dw((b,r), 4, lp)
|
||||
|
||||
so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) +
|
||||
th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) +
|
||||
th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) +
|
||||
th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) )
|
||||
end
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
|
||||
|
||||
export Nablanabla!, flw, backflow, flw_adapt, bflw_step!
|
||||
|
||||
|
||||
"""
|
||||
function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
|
||||
|
||||
Computes /`/` //slashed{D}^2 si /`/` ans stores it in `si`.
|
||||
|
||||
"""
|
||||
function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
|
||||
|
||||
@timeit "DwdagDw" begin
|
||||
|
||||
@timeit "g5Dslsh" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(dws.st, U, si, dpar.th, lp)
|
||||
end
|
||||
end
|
||||
|
||||
if abs(dpar.csw) > 1.0E-10
|
||||
@timeit "Dw_improvement" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh_impr!(dws.st, dws.csw, dpar.csw, si, lp)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@timeit "g5Dslsh" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(so, U, dws.st, dpar.th, lp)
|
||||
end
|
||||
end
|
||||
|
||||
if abs(dpar.csw) > 1.0E-10
|
||||
@timeit "Dw_improvement" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh_impr!(so, dws.csw, dpar.csw, dws.st, lp)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
|
||||
function krnl_g5Dslsh!(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||
|
||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
if (point_time((b,r),lp) != 1)
|
||||
|
||||
@inbounds begin
|
||||
|
||||
so[b,r] = 4*si[b,r]
|
||||
|
||||
bu1, ru1 = up((b,r), 1, lp)
|
||||
bd1, rd1 = dw((b,r), 1, lp)
|
||||
bu2, ru2 = up((b,r), 2, lp)
|
||||
bd2, rd2 = dw((b,r), 2, lp)
|
||||
bu3, ru3 = up((b,r), 3, lp)
|
||||
bd3, rd3 = dw((b,r), 3, lp)
|
||||
bu4, ru4 = up((b,r), 4, lp)
|
||||
bd4, rd4 = dw((b,r), 4, lp)
|
||||
|
||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||
|
||||
so[b,r] = dmul(Gamma{5}, so[b,r])
|
||||
end
|
||||
end
|
||||
return nothing
|
||||
end
|
||||
|
||||
|
||||
function krnl_g5Dslsh!(so, U, si, th, lp::SpaceParm{4,6,B,D}) where {D,B}
|
||||
|
||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
@inbounds begin
|
||||
|
||||
so[b,r] = 4*si[b,r]
|
||||
|
||||
bu1, ru1 = up((b,r), 1, lp)
|
||||
bd1, rd1 = dw((b,r), 1, lp)
|
||||
bu2, ru2 = up((b,r), 2, lp)
|
||||
bd2, rd2 = dw((b,r), 2, lp)
|
||||
bu3, ru3 = up((b,r), 3, lp)
|
||||
bd3, rd3 = dw((b,r), 3, lp)
|
||||
bu4, ru4 = up((b,r), 4, lp)
|
||||
bd4, rd4 = dw((b,r), 4, lp)
|
||||
|
||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||
|
||||
so[b,r] = dmul(Gamma{5}, so[b,r])
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function krnl_g5Dslsh_impr!(so, Fcsw, csw, si, lp::SpaceParm{4,6,B,D}) where {B,D}
|
||||
|
||||
@inbounds begin
|
||||
|
||||
b = Int64(CUDA.threadIdx().x);
|
||||
r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
so[b,r] += 0.5*csw*im*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])))
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
|
||||
|
||||
function krnl_g5Dslsh_impr!(so, Fcsw, csw, si, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||
|
||||
@inbounds begin
|
||||
|
||||
b = Int64(CUDA.threadIdx().x);
|
||||
r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
if (point_time((b,r),lp) != 1)
|
||||
|
||||
so[b,r] += 0.5*csw*im*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])))
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
end
|
667
src/Dirac/Diracoper.jl
Normal file
667
src/Dirac/Diracoper.jl
Normal file
|
@ -0,0 +1,667 @@
|
|||
|
||||
|
||||
|
||||
|
||||
## OPEN
|
||||
|
||||
"""
|
||||
function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
|
||||
|
||||
Computes the Dirac operator (with the Wilson term) `\`\``D_w``\`\` with gauge field U and parameters `dpar` of the field `si` and stores it in `so`.
|
||||
If `dpar.csw` is different from zero, the clover term should be stored in `dws.csw` via the Csw! function and is automatically included in the operator.
|
||||
|
||||
"""
|
||||
function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||
|
||||
SF_bndfix!(si,lp)
|
||||
if abs(dpar.csw) > 1.0E-10
|
||||
@timeit "Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
||||
end
|
||||
end
|
||||
else
|
||||
@timeit "Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
|
||||
end
|
||||
end
|
||||
end
|
||||
SF_bndfix!(so,lp)
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function krnl_Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, ct, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||
|
||||
# The field si is assumed to be zero at t = 0,T
|
||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
if ((point_time((b,r),lp) != 1) && (point_time((b,r),lp) != lp.iL[end]))
|
||||
bu1, ru1 = up((b,r), 1, lp)
|
||||
bd1, rd1 = dw((b,r), 1, lp)
|
||||
bu2, ru2 = up((b,r), 2, lp)
|
||||
bd2, rd2 = dw((b,r), 2, lp)
|
||||
bu3, ru3 = up((b,r), 3, lp)
|
||||
bd3, rd3 = dw((b,r), 3, lp)
|
||||
bu4, ru4 = up((b,r), 4, lp)
|
||||
bd4, rd4 = dw((b,r), 4, lp)
|
||||
|
||||
@inbounds begin
|
||||
|
||||
so[b,r] = (4+m0)*si[b,r] + im*tm*dmul(Gamma{5},si[b,r]) + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
|
||||
|
||||
|
||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||
|
||||
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == (lp.iL[4]-1))
|
||||
so[b,r] += (ct-1.0)*si[b,r]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function krnl_Dw!(so, U, si, m0, tm, th, ct, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||
|
||||
# The field si is assumed to be zero at t = 0,T
|
||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
if ((point_time((b,r),lp) != 1) && (point_time((b,r),lp) != lp.iL[end]))
|
||||
|
||||
bu1, ru1 = up((b,r), 1, lp)
|
||||
bd1, rd1 = dw((b,r), 1, lp)
|
||||
bu2, ru2 = up((b,r), 2, lp)
|
||||
bd2, rd2 = dw((b,r), 2, lp)
|
||||
bu3, ru3 = up((b,r), 3, lp)
|
||||
bd3, rd3 = dw((b,r), 3, lp)
|
||||
bu4, ru4 = up((b,r), 4, lp)
|
||||
bd4, rd4 = dw((b,r), 4, lp)
|
||||
|
||||
@inbounds begin
|
||||
|
||||
so[b,r] = (4+m0)*si[b,r] + im*tm*dmul(Gamma{5},si[b,r])
|
||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||
|
||||
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == (lp.iL[4]-1))
|
||||
so[b,r] += (ct-1.0)*si[b,r]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
|
||||
"""
|
||||
function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
|
||||
|
||||
Computes \`\` \\gamma_5 \`\` times the Dirac operator (with the Wilson term) with gauge field U and parameters `dpar` of the field `si` and stores it in `so`.
|
||||
If `dpar.csw` is different from zero, the clover term should be stored in `dws.csw` via the Csw! function and is automatically included in the operator.
|
||||
"""
|
||||
function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||
|
||||
SF_bndfix!(si,lp)
|
||||
if abs(dpar.csw) > 1.0E-10
|
||||
@timeit "g5Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
||||
end
|
||||
end
|
||||
else
|
||||
@timeit "g5Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
|
||||
end
|
||||
end
|
||||
end
|
||||
SF_bndfix!(so,lp)
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, ct, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||
|
||||
# The field si is assumed to be zero at t = 0,T
|
||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
if ((point_time((b,r),lp) != 1) && (point_time((b,r),lp) != lp.iL[end]))
|
||||
|
||||
bu1, ru1 = up((b,r), 1, lp)
|
||||
bd1, rd1 = dw((b,r), 1, lp)
|
||||
bu2, ru2 = up((b,r), 2, lp)
|
||||
bd2, rd2 = dw((b,r), 2, lp)
|
||||
bu3, ru3 = up((b,r), 3, lp)
|
||||
bd3, rd3 = dw((b,r), 3, lp)
|
||||
bu4, ru4 = up((b,r), 4, lp)
|
||||
bd4, rd4 = dw((b,r), 4, lp)
|
||||
|
||||
@inbounds begin
|
||||
|
||||
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
|
||||
|
||||
|
||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||
|
||||
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == (lp.iL[4]-1))
|
||||
so[b,r] += (ct-1.0)*si[b,r]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
so[b,r] = dmul(Gamma{5}, so[b,r])+ im*tm*si[b,r]
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function krnl_g5Dw!(so, U, si, m0, tm, th, ct, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||
|
||||
# The field si is assumed to be zero at t = 0,T
|
||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
if ((point_time((b,r),lp) != 1) && (point_time((b,r),lp) != lp.iL[end]))
|
||||
|
||||
bu1, ru1 = up((b,r), 1, lp)
|
||||
bd1, rd1 = dw((b,r), 1, lp)
|
||||
bu2, ru2 = up((b,r), 2, lp)
|
||||
bd2, rd2 = dw((b,r), 2, lp)
|
||||
bu3, ru3 = up((b,r), 3, lp)
|
||||
bd3, rd3 = dw((b,r), 3, lp)
|
||||
bu4, ru4 = up((b,r), 4, lp)
|
||||
bd4, rd4 = dw((b,r), 4, lp)
|
||||
|
||||
@inbounds begin
|
||||
|
||||
so[b,r] = (4+m0)*si[b,r]
|
||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||
|
||||
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == (lp.iL[4]-1))
|
||||
so[b,r] += (ct-1.0)*si[b,r]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
so[b,r] = dmul(Gamma{5}, so[b,r]) + im*tm*si[b,r]
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
"""
|
||||
function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
|
||||
|
||||
Applies the operator \`\` \\gamma_5 D_w \`\` twice to `si` and stores the result in `so`. This is equivalent to appling the operator \`\` D_w^\\dagger D_w \`\`
|
||||
The Dirac operator is the same as in the functions `Dw!` and `g5Dw!`
|
||||
"""
|
||||
function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||
|
||||
SF_bndfix!(si,lp)
|
||||
if abs(dpar.csw) > 1.0E-10
|
||||
@timeit "DwdagDw" begin
|
||||
|
||||
@timeit "g5Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
||||
end
|
||||
end
|
||||
SF_bndfix!(dws.st,lp)
|
||||
@timeit "g5Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, -dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
||||
end
|
||||
end
|
||||
SF_bndfix!(so,lp)
|
||||
end
|
||||
else
|
||||
@timeit "DwdagDw" begin
|
||||
|
||||
@timeit "g5Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
|
||||
end
|
||||
end
|
||||
SF_bndfix!(dws.st,lp)
|
||||
@timeit "g5Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, -dpar.tm, dpar.th, dpar.ct, lp)
|
||||
end
|
||||
end
|
||||
SF_bndfix!(so,lp)
|
||||
end
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
## PERDIODIC
|
||||
|
||||
function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
|
||||
|
||||
if abs(dpar.csw) > 1.0E-10
|
||||
@timeit "Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp)
|
||||
end
|
||||
end
|
||||
else
|
||||
@timeit "Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, lp)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function krnl_Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
|
||||
|
||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
bu1, ru1 = up((b,r), 1, lp)
|
||||
bd1, rd1 = dw((b,r), 1, lp)
|
||||
bu2, ru2 = up((b,r), 2, lp)
|
||||
bd2, rd2 = dw((b,r), 2, lp)
|
||||
bu3, ru3 = up((b,r), 3, lp)
|
||||
bd3, rd3 = dw((b,r), 3, lp)
|
||||
bu4, ru4 = up((b,r), 4, lp)
|
||||
bd4, rd4 = dw((b,r), 4, lp)
|
||||
|
||||
@inbounds begin
|
||||
|
||||
so[b,r] = (4+m0)*si[b,r]+ im*tm*dmul(Gamma{5},si[b,r]) + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
|
||||
|
||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function krnl_Dw!(so, U, si, m0, tm, th, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
|
||||
|
||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
bu1, ru1 = up((b,r), 1, lp)
|
||||
bd1, rd1 = dw((b,r), 1, lp)
|
||||
bu2, ru2 = up((b,r), 2, lp)
|
||||
bd2, rd2 = dw((b,r), 2, lp)
|
||||
bu3, ru3 = up((b,r), 3, lp)
|
||||
bd3, rd3 = dw((b,r), 3, lp)
|
||||
bu4, ru4 = up((b,r), 4, lp)
|
||||
bd4, rd4 = dw((b,r), 4, lp)
|
||||
|
||||
@inbounds begin
|
||||
|
||||
so[b,r] = (4+m0)*si[b,r] + im*tm*dmul(Gamma{5},si[b,r])
|
||||
|
||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
|
||||
|
||||
if abs(dpar.csw) > 1.0E-10
|
||||
@timeit "g5Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp)
|
||||
end
|
||||
end
|
||||
else
|
||||
@timeit "g5Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, lp)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
|
||||
|
||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
bu1, ru1 = up((b,r), 1, lp)
|
||||
bd1, rd1 = dw((b,r), 1, lp)
|
||||
bu2, ru2 = up((b,r), 2, lp)
|
||||
bd2, rd2 = dw((b,r), 2, lp)
|
||||
bu3, ru3 = up((b,r), 3, lp)
|
||||
bd3, rd3 = dw((b,r), 3, lp)
|
||||
bu4, ru4 = up((b,r), 4, lp)
|
||||
bd4, rd4 = dw((b,r), 4, lp)
|
||||
|
||||
@inbounds begin
|
||||
|
||||
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
|
||||
|
||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||
|
||||
so[b,r] = dmul(Gamma{5}, so[b,r])+ im*tm*si[b,r]
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function krnl_g5Dw!(so, U, si, m0, tm, th, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
|
||||
|
||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
bu1, ru1 = up((b,r), 1, lp)
|
||||
bd1, rd1 = dw((b,r), 1, lp)
|
||||
bu2, ru2 = up((b,r), 2, lp)
|
||||
bd2, rd2 = dw((b,r), 2, lp)
|
||||
bu3, ru3 = up((b,r), 3, lp)
|
||||
bd3, rd3 = dw((b,r), 3, lp)
|
||||
bu4, ru4 = up((b,r), 4, lp)
|
||||
bd4, rd4 = dw((b,r), 4, lp)
|
||||
|
||||
@inbounds begin
|
||||
|
||||
so[b,r] = (4+m0)*si[b,r]
|
||||
|
||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||
|
||||
so[b,r] = dmul(Gamma{5}, so[b,r]) + im*tm*si[b,r]
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
|
||||
|
||||
if abs(dpar.csw) > 1.0E-10
|
||||
@timeit "DwdagDw" begin
|
||||
|
||||
@timeit "g5Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp)
|
||||
end
|
||||
end
|
||||
|
||||
@timeit "g5Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, -dpar.tm, dpar.th, dpar.csw, lp)
|
||||
end
|
||||
end
|
||||
end
|
||||
else
|
||||
@timeit "DwdagDw" begin
|
||||
|
||||
@timeit "g5Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.tm, dpar.th, lp)
|
||||
end
|
||||
end
|
||||
|
||||
@timeit "g5Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, -dpar.tm, dpar.th, lp)
|
||||
end
|
||||
end
|
||||
end end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
## SF
|
||||
|
||||
function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||
|
||||
SF_bndfix!(si,lp)
|
||||
if abs(dpar.csw) > 1.0E-10
|
||||
@timeit "Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
||||
end
|
||||
end
|
||||
else
|
||||
@timeit "Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
|
||||
end
|
||||
end
|
||||
end
|
||||
SF_bndfix!(so,lp)
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function krnl_Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||
|
||||
# The field si is assumed to be zero at t = 0
|
||||
|
||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
if (point_time((b,r),lp) != 1)
|
||||
|
||||
bu1, ru1 = up((b,r), 1, lp)
|
||||
bd1, rd1 = dw((b,r), 1, lp)
|
||||
bu2, ru2 = up((b,r), 2, lp)
|
||||
bd2, rd2 = dw((b,r), 2, lp)
|
||||
bu3, ru3 = up((b,r), 3, lp)
|
||||
bd3, rd3 = dw((b,r), 3, lp)
|
||||
bu4, ru4 = up((b,r), 4, lp)
|
||||
bd4, rd4 = dw((b,r), 4, lp)
|
||||
|
||||
@inbounds begin
|
||||
|
||||
so[b,r] = (4+m0)*si[b,r] + im*tm*dmul(Gamma{5},si[b,r]) + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
|
||||
|
||||
|
||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||
|
||||
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
|
||||
so[b,r] += (ct-1.0)*si[b,r]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function krnl_Dw!(so, U, si, m0, tm, th, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||
|
||||
# The field si is assumed to be zero at t = 0
|
||||
|
||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
if (point_time((b,r),lp) != 1)
|
||||
|
||||
bu1, ru1 = up((b,r), 1, lp)
|
||||
bd1, rd1 = dw((b,r), 1, lp)
|
||||
bu2, ru2 = up((b,r), 2, lp)
|
||||
bd2, rd2 = dw((b,r), 2, lp)
|
||||
bu3, ru3 = up((b,r), 3, lp)
|
||||
bd3, rd3 = dw((b,r), 3, lp)
|
||||
bu4, ru4 = up((b,r), 4, lp)
|
||||
bd4, rd4 = dw((b,r), 4, lp)
|
||||
|
||||
@inbounds begin
|
||||
|
||||
so[b,r] = (4+m0)*si[b,r] + im*tm*dmul(Gamma{5},si[b,r])
|
||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||
|
||||
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
|
||||
so[b,r] += (ct-1.0)*si[b,r]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
|
||||
function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||
|
||||
SF_bndfix!(si,lp)
|
||||
if abs(dpar.csw) > 1.0E-10
|
||||
@timeit "g5Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
||||
end
|
||||
end
|
||||
else
|
||||
@timeit "g5Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
|
||||
end
|
||||
end
|
||||
end
|
||||
SF_bndfix!(so,lp)
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||
|
||||
# The field si is assumed to be zero at t = 0
|
||||
|
||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
if (point_time((b,r),lp) != 1)
|
||||
|
||||
bu1, ru1 = up((b,r), 1, lp)
|
||||
bd1, rd1 = dw((b,r), 1, lp)
|
||||
bu2, ru2 = up((b,r), 2, lp)
|
||||
bd2, rd2 = dw((b,r), 2, lp)
|
||||
bu3, ru3 = up((b,r), 3, lp)
|
||||
bd3, rd3 = dw((b,r), 3, lp)
|
||||
bu4, ru4 = up((b,r), 4, lp)
|
||||
bd4, rd4 = dw((b,r), 4, lp)
|
||||
|
||||
@inbounds begin
|
||||
|
||||
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
|
||||
|
||||
|
||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||
|
||||
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
|
||||
so[b,r] += (ct-1.0)*si[b,r]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
so[b,r] = dmul(Gamma{5}, so[b,r])+ im*tm*si[b,r]
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function krnl_g5Dw!(so, U, si, m0, tm, th, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||
|
||||
# The field si is assumed to be zero at t = 0
|
||||
|
||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
if (point_time((b,r),lp) != 1)
|
||||
|
||||
bu1, ru1 = up((b,r), 1, lp)
|
||||
bd1, rd1 = dw((b,r), 1, lp)
|
||||
bu2, ru2 = up((b,r), 2, lp)
|
||||
bd2, rd2 = dw((b,r), 2, lp)
|
||||
bu3, ru3 = up((b,r), 3, lp)
|
||||
bd3, rd3 = dw((b,r), 3, lp)
|
||||
bu4, ru4 = up((b,r), 4, lp)
|
||||
bd4, rd4 = dw((b,r), 4, lp)
|
||||
|
||||
@inbounds begin
|
||||
|
||||
so[b,r] = (4+m0)*si[b,r]
|
||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||
|
||||
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
|
||||
so[b,r] += (ct-1.0)*si[b,r]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
so[b,r] = dmul(Gamma{5}, so[b,r]) + im*tm*si[b,r]
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||
|
||||
SF_bndfix!(si,lp)
|
||||
if abs(dpar.csw) > 1.0E-10
|
||||
@timeit "DwdagDw" begin
|
||||
|
||||
@timeit "g5Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
||||
end
|
||||
end
|
||||
SF_bndfix!(dws.st,lp)
|
||||
@timeit "g5Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, -dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
||||
end
|
||||
end
|
||||
SF_bndfix!(so,lp)
|
||||
end
|
||||
else
|
||||
@timeit "DwdagDw" begin
|
||||
|
||||
@timeit "g5Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
|
||||
end
|
||||
end
|
||||
SF_bndfix!(dws.st,lp)
|
||||
@timeit "g5Dw" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, -dpar.tm, dpar.th, dpar.ct, lp)
|
||||
end
|
||||
end
|
||||
SF_bndfix!(so,lp)
|
||||
end
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
|
@ -31,7 +31,7 @@ scalar_field(::Type{T}, lp::SpaceParm) where {T} = CuArray{T, 2}(undef, lp.b
|
|||
"""
|
||||
nscalar_field(::Type{T}, n::Integer, lp::SpaceParm)
|
||||
|
||||
Returns `n` scalar fields of elemental type `T`
|
||||
Returns `n` scalar fields of elemental type `T`.
|
||||
"""
|
||||
nscalar_field(::Type{T}, n, lp::SpaceParm) where {T} = CuArray{T, 3}(undef, lp.bsz, n, lp.rsz)
|
||||
|
||||
|
@ -46,7 +46,7 @@ scalar_field_point(::Type{T}, lp::SpaceParm{N,M,D}) where {T,N,M,D} = CuArray{T,
|
|||
"""
|
||||
tensor_field(::Type{T}, lp::SpaceParm)
|
||||
|
||||
Returns a tensor field of elemental type `T`.
|
||||
Returns a (symmetric) tensor field of elemental type `T`.
|
||||
"""
|
||||
tensor_field(::Type{T}, lp::SpaceParm) where {T} = CuArray{T, 3}(undef, lp.bsz, lp.npls, lp.rsz)
|
||||
|
||||
|
|
|
@ -1,12 +1,23 @@
|
|||
|
||||
|
||||
|
||||
"""
|
||||
struct U2alg{T} <: Algebra
|
||||
|
||||
Elements of the `U(2)` Algebra. The type `T <: AbstractFloat` can be used to define single or double precision elements.
|
||||
"""
|
||||
struct U2alg{T} <: Algebra
|
||||
u11::T
|
||||
u22::T
|
||||
u12::Complex{T}
|
||||
end
|
||||
|
||||
|
||||
"""
|
||||
antsym(a::SU2{T}) where T <: AbstractFloat
|
||||
|
||||
Returns the antisymmetrization of the SU2 element `a`, that is `\`\` `a - a^{\\dagger}` `\`. This method returns al element of `U2alg{T}`.
|
||||
"""
|
||||
function antsym(a::SU2{T}) where T <: AbstractFloat
|
||||
return U2alg{T}(2.0*imag(a.t1),-2.0*imag(a.t1),2.0*a.t2)
|
||||
end
|
||||
|
|
|
@ -1,6 +1,10 @@
|
|||
|
||||
|
||||
"""
|
||||
struct U3alg{T} <: Algebra
|
||||
|
||||
Elements of the `U(3)` Algebra. The type `T <: AbstractFloat` can be used to define single or double precision elements.
|
||||
"""
|
||||
struct U3alg{T} <: Algebra
|
||||
u11::T
|
||||
u22::T
|
||||
|
@ -10,6 +14,11 @@ struct U3alg{T} <: Algebra
|
|||
u23::Complex{T}
|
||||
end
|
||||
|
||||
"""
|
||||
antsym(a::SU3{T}) where T <: AbstractFloat
|
||||
|
||||
Returns the antisymmetrization of the SU3 element `a`, that is `\`\` `a - a^{\\dagger}` `\`. This method returns al element of `U3alg{T}`.
|
||||
"""
|
||||
function antsym(a::SU3{T}) where T <: AbstractFloat
|
||||
t1 = 2.0*imag(a.u11)
|
||||
t2 = 2.0*imag(a.u22)
|
||||
|
|
|
@ -38,7 +38,7 @@ norm2(a::SU3fund{T}) where T <: AbstractFloat = (abs2(a.t1) + abs2
|
|||
|
||||
Returns the scalar product of two fundamental elements. The convention is for the product to the linear in the second argument, and anti-linear in the first argument.
|
||||
"""
|
||||
dot(g1::SU3fund{T},g2::SU3fund{T}) where T <: AbstractFloat = conj(g1.t1)*g2.t1+g1.t2*conj(g2.t2)+g1.t3*conj(g2.t3)
|
||||
dot(g1::SU3fund{T},g2::SU3fund{T}) where T <: AbstractFloat = conj(g1.t1)*g2.t1+conj(g1.t2)*g2.t2+conj(g1.t3)*g2.t3
|
||||
|
||||
"""
|
||||
*(g::SU3{T},b::SU3fund{T})
|
||||
|
|
|
@ -36,7 +36,7 @@ norm2(a::SU2{T}) where T <: AbstractFloat = abs2(a.t1) + abs2(a.t2)
|
|||
"""
|
||||
tr(g::T) where T <: Group
|
||||
|
||||
Returns the trace of the groups element `g`.
|
||||
Returns the trace of the group element `g`.
|
||||
"""
|
||||
tr(g::SU2{T}) where T <: AbstractFloat = complex(2*real(g.t1), 0.0)
|
||||
|
||||
|
|
|
@ -40,25 +40,27 @@ include("YM/YM.jl")
|
|||
using .YM
|
||||
export ztwist
|
||||
export YMworkspace, GaugeParm, force0_wilson!, field, field_pln, randomize!, zero!, norm2
|
||||
export force_gauge, MD!
|
||||
export gauge_action, hamiltonian, plaquette, HMC!, OMF4!
|
||||
export Eoft_clover, Eoft_plaq, Qtop
|
||||
export FlowIntr, wfl_euler, zfl_euler, wfl_rk2, zfl_rk2, wfl_rk3, zfl_rk3
|
||||
export flw, flw_adapt
|
||||
export sfcoupling, bndfield, setbndfield
|
||||
export import_lex64, import_cern64, import_bsfqcd, save_cnfg, read_cnfg
|
||||
export import_lex64, import_cern64, import_bsfqcd, save_cnfg, read_cnfg, read_gp
|
||||
|
||||
include("Spinors/Spinors.jl")
|
||||
|
||||
using .Spinors
|
||||
export Spinor, Pgamma
|
||||
export Spinor, Pgamma, Gamma
|
||||
export imm, mimm
|
||||
export pmul, gpmul, gdagpmul, dmul
|
||||
|
||||
include("Dirac/Dirac.jl")
|
||||
using .Dirac
|
||||
export DiracWorkspace, DiracParam
|
||||
export Dw!, g5Dw!, DwdagDw!, SF_bndfix!, Csw!, pfrandomize!
|
||||
export Dw!, g5Dw!, DwdagDw!, SF_bndfix!, Csw!, pfrandomize!, mtwmdpar
|
||||
export read_prop, save_prop, read_dpar
|
||||
export Nablanabla!, flw, backflow
|
||||
|
||||
include("Solvers/Solvers.jl")
|
||||
using .Solvers
|
||||
|
|
20
src/MD/MD.jl
20
src/MD/MD.jl
|
@ -24,6 +24,11 @@ const r1omf2 = 0.1931833275037836
|
|||
const r2omf2 = 0.5
|
||||
const r3omf2 = 1 - 2*r1omf2
|
||||
|
||||
"""
|
||||
struct IntrScheme{N, T}
|
||||
|
||||
Integrator for the molecular dynamics.
|
||||
"""
|
||||
struct IntrScheme{N, T}
|
||||
r::NTuple{N, T}
|
||||
eps::T
|
||||
|
@ -31,8 +36,23 @@ struct IntrScheme{N, T}
|
|||
end
|
||||
|
||||
|
||||
"""
|
||||
omf2(::Type{T}, eps, ns)
|
||||
|
||||
Second order Omelyan integrator with `eps` stepsize and `ns` steps.
|
||||
"""
|
||||
omf2(::Type{T}, eps, ns) where T = IntrScheme{3,T}((r1omf2,r2omf2,r3omf2), eps, ns)
|
||||
"""
|
||||
omf4(::Type{T}, eps, ns)
|
||||
|
||||
Fourth order Omelyan integrator with `eps` stepsize and `ns` steps.
|
||||
"""
|
||||
omf4(::Type{T}, eps, ns) where T = IntrScheme{6,T}((r1omf4,r2omf4,r3omf4,r4omf4,r5omf4,r6omf4), eps, ns)
|
||||
"""
|
||||
leapfrog(::Type{T}, eps, ns)
|
||||
|
||||
Leapfrog integrator with `eps` stepsize and `ns` steps.
|
||||
"""
|
||||
leapfrog(::Type{T}, eps, ns) where T = IntrScheme{2,T}((0.5,1.0), eps, ns)
|
||||
|
||||
|
||||
|
|
|
@ -9,11 +9,6 @@
|
|||
### created: Tue Nov 30 11:10:57 2021
|
||||
###
|
||||
|
||||
"""
|
||||
function CG!
|
||||
|
||||
Solves the linear equation `Ax = si`
|
||||
"""
|
||||
function krnl_dot!(sum,fone,ftwo)
|
||||
b=Int64(CUDA.threadIdx().x)
|
||||
r=Int64(CUDA.blockIdx().x)
|
||||
|
@ -23,7 +18,7 @@ function krnl_dot!(sum,fone,ftwo)
|
|||
return nothing
|
||||
end
|
||||
|
||||
function field_dot(fone::AbstractArray,ftwo::AbstractArray,sumf,lp) where {T}
|
||||
function field_dot(fone::AbstractArray,ftwo::AbstractArray,sumf,lp)
|
||||
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_dot!(sumf,fone,ftwo)
|
||||
|
@ -32,6 +27,12 @@ function field_dot(fone::AbstractArray,ftwo::AbstractArray,sumf,lp) where {T}
|
|||
return sum(sumf)
|
||||
end
|
||||
|
||||
|
||||
"""
|
||||
function CG!(si, U, A, dpar::DiracParam, lp::SpaceParm, dws::DiracWorkspace{T}, maxiter::Int64 = 10, tol=1.0)
|
||||
|
||||
Solves the linear equation `Ax = si`
|
||||
"""
|
||||
function CG!(si, U, A, dpar::DiracParam, lp::SpaceParm, dws::DiracWorkspace{T}, maxiter::Int64 = 10, tol=1.0) where {T}
|
||||
|
||||
dws.sr .= si
|
||||
|
@ -74,4 +75,4 @@ function CG!(si, U, A, dpar::DiracParam, lp::SpaceParm, dws::DiracWorkspace{T},
|
|||
end
|
||||
|
||||
return niter
|
||||
end
|
||||
end
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
function propagator!(pro,U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm, maxiter::Int64, tol::Float64, y::NTuple{4,Int64}, c::Int64, s::Int64)
|
||||
|
||||
Saves the fermionic progapator in pro for a source at point `y` with color `c` and spin `s`. If the last three arguments are replaced by `time::Int64`, the source is replaced
|
||||
by a random source in spin and color at t = `time`.
|
||||
by a random source in spin and color at t = `time`. Returns the number of iterations.
|
||||
|
||||
"""
|
||||
function propagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm, maxiter::Int64, tol::Float64, y::NTuple{4,Int64}, c::Int64, s::Int64) where {T}
|
||||
|
@ -16,19 +16,23 @@ function propagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::Space
|
|||
src[b,r] = dmul(Gamma{5},src[b,r])
|
||||
return nothing
|
||||
end
|
||||
|
||||
fill!(dws.sp,zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp))))
|
||||
|
||||
CUDA.@allowscalar dws.sp[point_index(CartesianIndex{lp.ndim}(y),lp)...] = Spinor{4,SU3fund{Float64}}(ntuple(i -> (i==s)*SU3fund{Float64}(ntuple(j -> (j==c)*1.0,3)...),4))
|
||||
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sp)
|
||||
|
||||
@timeit "Propagator computation" begin
|
||||
|
||||
fill!(dws.sp,zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp))))
|
||||
|
||||
CUDA.@allowscalar dws.sp[point_index(CartesianIndex{lp.ndim}(y),lp)...] = Spinor{4,SU3fund{Float64}}(ntuple(i -> (i==s)*SU3fund{Float64}(ntuple(j -> (j==c)*1.0,3)...),4))
|
||||
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sp)
|
||||
end
|
||||
|
||||
g5Dw!(pro,U,dws.sp,mtwmdpar(dpar),dws,lp)
|
||||
|
||||
niter = CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol)
|
||||
end
|
||||
|
||||
g5Dw!(pro,U,dws.sp,dpar,dws,lp)
|
||||
|
||||
CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol)
|
||||
return nothing
|
||||
|
||||
return niter
|
||||
end
|
||||
|
||||
function propagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm, maxiter::Int64, tol::Float64, time::Int64) where {T}
|
||||
|
@ -39,29 +43,30 @@ function propagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::Space
|
|||
src[b,r] = dmul(Gamma{5},src[b,r])
|
||||
return nothing
|
||||
end
|
||||
|
||||
pfrandomize!(dws.sp,lp,time)
|
||||
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sp)
|
||||
end
|
||||
|
||||
g5Dw!(pro,U,dws.sp,dpar,dws,lp)
|
||||
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sp)
|
||||
@timeit "Propagator computation" begin
|
||||
fill!(dws.sp,zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp))))
|
||||
|
||||
pfrandomize!(dws.sp,lp,time)
|
||||
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sp)
|
||||
end
|
||||
|
||||
g5Dw!(pro,U,dws.sp,mtwmdpar(dpar),dws,lp)
|
||||
|
||||
niter = CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol)
|
||||
end
|
||||
|
||||
CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol)
|
||||
return nothing
|
||||
|
||||
return niter
|
||||
end
|
||||
|
||||
"""
|
||||
|
||||
function bndpropagator!(pro,U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64)
|
||||
|
||||
Saves the propagator in from the t=0 boundary to the bulk for the SF boundary conditions for a source with color 'c' and spin 's'. The factor c_t is included while the factor 1/sqrt(V) is not.
|
||||
For the propagator from T to the bulk, use the function Tbndpropagator(U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64)
|
||||
Saves the propagator from the t=0 boundary to the bulk for the SF boundary conditions for a source with color 'c' and spin 's' in 'pro'. The factor c_t is included while the factor 1/sqrt(V) is not.
|
||||
For the propagator from T to the bulk, use the function Tbndpropagator(U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64). Returns the number of iterations.
|
||||
|
||||
"""
|
||||
function bndpropagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64) where {T,D}
|
||||
|
@ -78,35 +83,39 @@ function bndpropagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::Sp
|
|||
r=Int64(CUDA.blockIdx().x)
|
||||
|
||||
if (point_time((b,r),lp) == 2)
|
||||
bd4, rd4 = dw((b,r), 4, lp)
|
||||
src[b,r] = gdagpmul(Pgamma{4,1},U[bd4,4,rd4],Spinor{4,SU3fund{Float64}}(ntuple(i -> (i==s)*SU3fund{Float64}(ntuple(j -> (j==c)*1.0,3)...),4)))/2
|
||||
bd4, rd4 = dw((b,r), 4, lp)
|
||||
src[b,r] = gdagpmul(Pgamma{4,1},U[bd4,4,rd4],Spinor{4,SU3fund{Float64}}(ntuple(i -> (i==s)*SU3fund{Float64}(ntuple(j -> (j==c)*1.0,3)...),4)))/2
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
fill!(dws.sp,zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp))))
|
||||
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_bndsrc!(dws.sp, U, lp, c, s)
|
||||
@timeit "Propagator computation" begin
|
||||
SF_bndfix!(pro,lp)
|
||||
fill!(dws.sp,zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp))))
|
||||
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_bndsrc!(dws.sp, U, lp, c, s)
|
||||
end
|
||||
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sp)
|
||||
end
|
||||
|
||||
g5Dw!(pro,U,dpar.ct*dws.sp,mtwmdpar(dpar),dws,lp)
|
||||
|
||||
niter = CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol)
|
||||
end
|
||||
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sp)
|
||||
end
|
||||
|
||||
g5Dw!(pro,U,dpar.ct*dws.sp,dpar,dws,lp)
|
||||
|
||||
CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol)
|
||||
return pro
|
||||
return niter
|
||||
end
|
||||
|
||||
"""
|
||||
|
||||
function Tbndpropagator(U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64)
|
||||
function Tbndpropagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64)
|
||||
|
||||
Returns the propagator from the t=T boundary to the bulk for the SF boundary conditions for a source with color 'c' and spin 's'. The factor c_t is included while the factor 1/sqrt(V) is not.
|
||||
For the propagator from t=0 to the bulk, use the function bndpropagator(U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64)
|
||||
For the propagator from t=0 to the bulk, use the function bndpropagator(U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64). Returns the number of iterations.
|
||||
|
||||
"""
|
||||
function Tbndpropagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64) where {T,D}
|
||||
|
@ -123,26 +132,29 @@ function Tbndpropagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::S
|
|||
r=Int64(CUDA.blockIdx().x)
|
||||
|
||||
if (point_time((b,r),lp) == lp.iL[end])
|
||||
src[b,r] = gpmul(Pgamma{4,-1},U[b,4,r],Spinor{4,SU3fund{Float64}}(ntuple(i -> (i==s)*SU3fund{Float64}(ntuple(j -> (j==c)*1.0,3)...),4)))/2
|
||||
src[b,r] = gpmul(Pgamma{4,-1},U[b,4,r],Spinor{4,SU3fund{Float64}}(ntuple(i -> (i==s)*SU3fund{Float64}(ntuple(j -> (j==c)*1.0,3)...),4)))/2
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
fill!(dws.sp,zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp))))
|
||||
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_bndsrc!(dws.sp, U, lp, c, s)
|
||||
end
|
||||
|
||||
CUDA.@sync begin
|
||||
@timeit "Propagator computation" begin
|
||||
fill!(dws.sp,zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp))))
|
||||
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_bndsrc!(dws.sp, U, lp, c, s)
|
||||
end
|
||||
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sp)
|
||||
end
|
||||
|
||||
|
||||
g5Dw!(pro,U,dpar.ct*dws.sp,mtwmdpar(dpar),dws,lp)
|
||||
|
||||
niter = CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol)
|
||||
end
|
||||
|
||||
g5Dw!(pro,U,dpar.ct*dws.sp,dpar,dws,lp)
|
||||
|
||||
CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol)
|
||||
return pro
|
||||
return niter
|
||||
end
|
||||
|
||||
|
||||
|
|
|
@ -26,19 +26,19 @@ This structure contains information about the lattice being simulated. The param
|
|||
- `N`: The number of dimensions
|
||||
- `M`: The number of planes (i.e. \`\` N(N-1)/2 \`\`)
|
||||
- `B`: The boundary conditions in Euclidean time. Acceptable values are
|
||||
- `BC_PERIODIC`: Periodic boundary conditions
|
||||
- `BC_SF_AFWB`: Schrödinger Funtional Aoki-Frezzoptti-Weisz Choice B.
|
||||
- `BC_SF_ORBI`: Schrödinger Funtional orbifold constructions.
|
||||
- `BC_PERIODIC`: Periodic boundary conditions.
|
||||
- `BC_SF_AFWB`: Schrödinger Functional Aoki-Frezzotti-Weisz Choice B.
|
||||
- `BC_SF_ORBI`: Schrödinger Functional orbifold constructions.
|
||||
- `BC_OPEN`: Open boundary conditions.
|
||||
|
||||
The structure conatins the following components:
|
||||
The structure contains the following components:
|
||||
- `iL`: Tuple containing the lattice length in each dimension.
|
||||
- `plidx`: The directions of each plane
|
||||
- `blk`: The block size in each each dimension
|
||||
- `rbk`: The number of blocks in each dimension
|
||||
- `bsz`: The number of points in each block
|
||||
- `rsz`: The number of blocks in the lattice
|
||||
- `ntw`: The twist tensor in each plane
|
||||
- `plidx`: The directions of each plane.
|
||||
- `blk`: The block size in each each dimension.
|
||||
- `rbk`: The number of blocks in each dimension.
|
||||
- `bsz`: The number of points in each block.
|
||||
- `rsz`: The number of blocks in the lattice.
|
||||
- `ntw`: The twist tensor in each plane.
|
||||
"""
|
||||
struct SpaceParm{N,M,B,D}
|
||||
ndim::Int64
|
||||
|
|
|
@ -14,6 +14,7 @@ module Spinors
|
|||
using ..Groups
|
||||
import ..Groups.imm, ..Groups.mimm, ..Groups.norm, ..Groups.norm2, ..Groups.dot
|
||||
|
||||
|
||||
struct Spinor{NS,G}
|
||||
s::NTuple{NS,G}
|
||||
end
|
||||
|
@ -169,7 +170,7 @@ end
|
|||
|
||||
|
||||
"""
|
||||
gpmul(pgamma{N,S}, g::G, a::Spinor) G <: Group
|
||||
gpmul(Pgamma{N,S}, g::G, a::Spinor) G <: Group
|
||||
|
||||
Returns ``g(1+s\\gamma_N)a``
|
||||
"""
|
||||
|
@ -226,7 +227,7 @@ end
|
|||
end
|
||||
|
||||
"""
|
||||
gdagpmul(pgamma{N,S}, g::G, a::Spinor) G <: Group
|
||||
gdagpmul(Pgamma{N,S}, g::G, a::Spinor) G <: Group
|
||||
|
||||
Returns ``g^+ (1+s\\gamma_N)a``
|
||||
"""
|
||||
|
@ -284,33 +285,31 @@ end
|
|||
|
||||
|
||||
# dummy structs for dispatch:
|
||||
# Basis of \\Gamma_n
|
||||
# Basis of \\gamma_n
|
||||
struct Gamma{N}
|
||||
end
|
||||
|
||||
"""
|
||||
dmul(n::Int64, a::Spinor)
|
||||
dmul(Gamma{n}, a::Spinor)
|
||||
|
||||
Returns ``\\Gamma_n a``
|
||||
Returns ``\\gamma_n a``. Indexing for Dirac basis ``\\gamma_n``:
|
||||
|
||||
indexing for Dirac basis ``\\Gamma_n``:
|
||||
|
||||
1 gamma1
|
||||
2 gamma2
|
||||
3 gamma3
|
||||
4 gamma0
|
||||
5 gamma5
|
||||
6 gamma1 gamma5
|
||||
7 gamma2 gamma5
|
||||
8 gamma3 gamma5
|
||||
9 gamma0 gamma5
|
||||
10 sigma01
|
||||
11 sigma02
|
||||
12 sigma03
|
||||
13 sigma21
|
||||
14 sigma32
|
||||
15 sigma31
|
||||
16 identity
|
||||
1 ``\\gamma_1``;
|
||||
2 ``\\gamma_2``;
|
||||
3 ``\\gamma_3``;
|
||||
4 ``\\gamma_0``;
|
||||
5 ``\\gamma_5``;
|
||||
6 ``\\gamma_1 \\gamma_5``;
|
||||
7 ``\\gamma_2 \\gamma_5``;
|
||||
8 ``\\gamma_3 \\gamma_5``;
|
||||
9 ``\\gamma_0 \\gamma_5``;
|
||||
10 ``\\sigma_{01}``;
|
||||
11 ``\\sigma_{02}``;
|
||||
12 ``\\sigma_{03}``;
|
||||
13 ``\\sigma_{21}``;
|
||||
14 ``\\sigma_{32}``;
|
||||
15 ``\\sigma_{31}``;
|
||||
16 identity;
|
||||
|
||||
"""
|
||||
@inline dmul(::Type{Gamma{1}}, a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}((mimm(a.s[4]), mimm(a.s[3]), imm(a.s[2]), imm(a.s[1])))
|
||||
|
|
38
src/YM/YM.jl
38
src/YM/YM.jl
|
@ -20,6 +20,19 @@ using ..MD
|
|||
|
||||
import Base.show
|
||||
|
||||
"""
|
||||
struct GaugeParm{T,G,N}
|
||||
|
||||
Structure containing the parameters of a pure gauge simulation. These are:
|
||||
- beta: Type `T`. The bare coupling of the simulation.
|
||||
- c0: Type `T`. LatticeGPU supports the simulation of gauge actions made of 1x1 Wilson Loops and 2x1 Wilson loops. The parameter c0 defines the coefficient on the simulation of the 1x1 loops. Some common choices are:
|
||||
- c0=1: Wilson plaquette action.
|
||||
- c0=5/3: Tree-level improved Lüscher-Weisz action.
|
||||
- c0=3.648: Iwasaki gauge action.
|
||||
- cG: Tuple (`T`, `T`). Boundary improvement parameters.
|
||||
- ng: `Int64`. Rank of the gauge group.
|
||||
- Ubnd: Boundary field for SF boundary conditions.
|
||||
"""
|
||||
struct GaugeParm{T,G,N}
|
||||
beta::T
|
||||
c0::T
|
||||
|
@ -63,6 +76,21 @@ function Base.show(io::IO, gp::GaugeParm{T, G, N}) where {T,G,N}
|
|||
return nothing
|
||||
end
|
||||
|
||||
"""
|
||||
struct YMworkspace{T}
|
||||
|
||||
Structure containing memory workspace that is reused by different routines in order to avoid allocating/deallocating time.
|
||||
The parameter `T` represents the precision of the simulation (i.e. single/double). The structure contains the following components
|
||||
- GRP: Group being simulated.
|
||||
- ALG: Corresponding Algebra.
|
||||
- PRC: Precision (i.e. `T`).
|
||||
- frc1: Algebra field with natural indexing.
|
||||
- frc2: Algebra field with natural indexing.
|
||||
- mom: Algebra field with natural indexing.
|
||||
- U1: Group field with natural indexing.
|
||||
- cm: Complex field with lexicographic indexing.
|
||||
- rm: Real field with lexicographic indexing.
|
||||
"""
|
||||
struct YMworkspace{T}
|
||||
GRP
|
||||
ALG
|
||||
|
@ -110,7 +138,11 @@ function Base.show(io::IO, ymws::YMworkspace)
|
|||
return nothing
|
||||
end
|
||||
|
||||
"""
|
||||
function ztwist(gp::GaugeParm{T,G}, lp::SpaceParm{N,M,B,D}[, ipl])
|
||||
|
||||
Returns the twist factor. If a plane index is passed, returns the twist factor as a Complex{T}. If this is not provided, returns a tuple, containing the factor of each plane.
|
||||
"""
|
||||
function ztwist(gp::GaugeParm{T,G}, lp::SpaceParm{N,M,B,D}) where {T,G,N,M,B,D}
|
||||
|
||||
function plnf(ipl)
|
||||
|
@ -133,10 +165,10 @@ include("YMfields.jl")
|
|||
export randomize!, zero!, norm2
|
||||
|
||||
include("YMact.jl")
|
||||
export krnl_plaq!, force0_wilson!
|
||||
export krnl_plaq!, force_gauge, force_wilson
|
||||
|
||||
include("YMhmc.jl")
|
||||
export gauge_action, hamiltonian, plaquette, HMC!, OMF4!
|
||||
export gauge_action, hamiltonian, plaquette, HMC!, MD!
|
||||
|
||||
include("YMflow.jl")
|
||||
export FlowIntr, flw, flw_adapt
|
||||
|
@ -147,6 +179,6 @@ include("YMsf.jl")
|
|||
export sfcoupling, bndfield, setbndfield
|
||||
|
||||
include("YMio.jl")
|
||||
export import_lex64, import_cern64, import_bsfqcd, save_cnfg, read_cnfg
|
||||
export import_lex64, import_cern64, import_bsfqcd, save_cnfg, read_cnfg, read_gp
|
||||
|
||||
end
|
||||
|
|
942
src/YM/YMact.jl
942
src/YM/YMact.jl
File diff suppressed because it is too large
Load diff
|
@ -9,8 +9,14 @@
|
|||
### created: Thu Jul 15 15:16:47 2021
|
||||
###
|
||||
|
||||
function randomize!(f, lp::SpaceParm, ymws::YMworkspace; curng=CUDA.default_rng())
|
||||
|
||||
"""
|
||||
function randomize!(f, lp::SpaceParm, ymws::YMworkspace; curng=CUDA.default_rng())
|
||||
|
||||
Given an algebra field with natural indexing, this routine sets the components to random Gaussian distributed values. If SF boundary conditions are used, the force at the boundaries is set to zero.
|
||||
"""
|
||||
function randomize!(f, lp::SpaceParm, ymws::YMworkspace; curng=CUDA.default_rng())
|
||||
|
||||
if ymws.ALG == SU2alg
|
||||
@timeit "Randomize SU(2) algebra field" begin
|
||||
m = Random.randn(curng, ymws.PRC, lp.bsz,lp.ndim,3,lp.rsz)
|
||||
|
@ -49,31 +55,44 @@ function krnl_assign_SU3!(frc::AbstractArray{T}, m, lp::SpaceParm{N,M,BC_PERIODI
|
|||
return nothing
|
||||
end
|
||||
|
||||
function krnl_assign_SU3!(frc::AbstractArray{T}, m, lp::SpaceParm{N,M,B,D}) where {T,N,M,B,D}
|
||||
function krnl_assign_SU3!(frc::AbstractArray{T}, m, lp::SpaceParm{N,M,BC_OPEN,D}) where {T,N,M,D}
|
||||
|
||||
@inbounds begin
|
||||
b = Int64(CUDA.threadIdx().x)
|
||||
r = Int64(CUDA.blockIdx().x)
|
||||
for id in 1:lp.ndim
|
||||
frc[b,id,r] = SU3alg(m[b,id,1,r], m[b,id,2,r], m[b,id,3,r],
|
||||
m[b,id,4,r], m[b,id,5,r], m[b,id,6,r],
|
||||
m[b,id,7,r], m[b,id,8,r])
|
||||
end
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function krnl_assign_SU3!(frc::AbstractArray{T}, m, lp::Union{SpaceParm{N,M,BC_SF_ORBI,D},SpaceParm{N,M,BC_SF_AFWB,D}}) where {T,N,M,D}
|
||||
|
||||
@inbounds begin
|
||||
b = Int64(CUDA.threadIdx().x)
|
||||
r = Int64(CUDA.blockIdx().x)
|
||||
it = point_time((b,r), lp)
|
||||
|
||||
if ((B==BC_SF_AFWB)||(B==BC_SF_ORBI))
|
||||
if it == 1
|
||||
for id in 1:lp.ndim-1
|
||||
frc[b,id,r] = zero(T)
|
||||
end
|
||||
frc[b,N,r] = SU3alg(m[b,N,1,r], m[b,N,2,r], m[b,N,3,r],
|
||||
m[b,N,4,r], m[b,N,5,r], m[b,N,6,r],
|
||||
m[b,N,7,r], m[b,N,8,r])
|
||||
else
|
||||
for id in 1:lp.ndim
|
||||
frc[b,id,r] = SU3alg(m[b,id,1,r], m[b,id,2,r], m[b,id,3,r],
|
||||
m[b,id,4,r], m[b,id,5,r], m[b,id,6,r],
|
||||
m[b,id,7,r], m[b,id,8,r])
|
||||
end
|
||||
if it == 1
|
||||
for id in 1:lp.ndim-1
|
||||
frc[b,id,r] = zero(T)
|
||||
end
|
||||
frc[b,N,r] = SU3alg(m[b,N,1,r], m[b,N,2,r], m[b,N,3,r],
|
||||
m[b,N,4,r], m[b,N,5,r], m[b,N,6,r],
|
||||
m[b,N,7,r], m[b,N,8,r])
|
||||
else
|
||||
for id in 1:lp.ndim
|
||||
frc[b,id,r] = SU3alg(m[b,id,1,r], m[b,id,2,r], m[b,id,3,r],
|
||||
m[b,id,4,r], m[b,id,5,r], m[b,id,6,r],
|
||||
m[b,id,7,r], m[b,id,8,r])
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
|
|
116
src/YM/YMflow.jl
116
src/YM/YMflow.jl
|
@ -10,6 +10,11 @@
|
|||
###
|
||||
|
||||
|
||||
"""
|
||||
struct FlowIntr{N,T}
|
||||
|
||||
Structure containing info about a particular flow integrator
|
||||
"""
|
||||
struct FlowIntr{N,T}
|
||||
r::T
|
||||
e0::NTuple{N,T}
|
||||
|
@ -26,11 +31,46 @@ struct FlowIntr{N,T}
|
|||
end
|
||||
|
||||
# pre-defined integrators
|
||||
"""
|
||||
wfl_euler(::Type{T}, eps::T, tol::T)
|
||||
|
||||
Euler scheme integrator for the Wilson Flow. The fixed step size is given by `eps` and the tolerance for the adaptive integrators by `tol`.
|
||||
"""
|
||||
wfl_euler(::Type{T}, eps::T, tol::T) where T = FlowIntr{0,T}(one(T),(),(),false,one(T),eps,tol,one(T)/200,one(T)/10,9/10)
|
||||
|
||||
"""
|
||||
zfl_euler(::Type{T}, eps::T, tol::T)
|
||||
|
||||
Euler scheme integrator for the Zeuthen flow. The fixed step size is given by `eps` and the tolerance for the adaptive integrators by `tol`.
|
||||
"""
|
||||
zfl_euler(::Type{T}, eps::T, tol::T) where T = FlowIntr{0,T}(one(T),(),(),true, (one(T)*5)/3,eps,tol,one(T)/200,one(T)/10,9/10)
|
||||
|
||||
"""
|
||||
wfl_rk2(::Type{T}, eps::T, tol::T)
|
||||
|
||||
Second order Runge-Kutta integrator for the Wilson flow. The fixed step size is given by `eps` and the tolerance for the adaptive integrators by `tol`.
|
||||
"""
|
||||
wfl_rk2(::Type{T}, eps::T, tol::T) where T = FlowIntr{1,T}(one(T)/2,(-one(T)/2,),(one(T),),false,one(T),eps,tol,one(T)/200,one(T)/10,9/10)
|
||||
|
||||
"""
|
||||
zfl_rk2(::Type{T}, eps::T, tol::T)
|
||||
|
||||
Second order Runge-Kutta integrator for the Zeuthen flow. The fixed step size is given by `eps` and the tolerance for the adaptive integrators by `tol`.
|
||||
"""
|
||||
zfl_rk2(::Type{T}, eps::T, tol::T) where T = FlowIntr{1,T}(one(T)/2,(-one(T)/2,),(one(T),),true, (one(T)*5)/3,eps,tol,one(T)/200,one(T)/10,9/10)
|
||||
|
||||
"""
|
||||
wfl_rk3(::Type{T}, eps::T, tol::T)
|
||||
|
||||
Third order Runge-Kutta integrator for the Wilson flow. The fixed step size is given by `eps` and the tolerance for the adaptive integrators by `tol`.
|
||||
"""
|
||||
wfl_rk3(::Type{T}, eps::T, tol::T) where T = FlowIntr{2,T}(one(T)/4,(-17/36,-one(T)),(8/9,3/4),false,one(T),eps,tol,one(T)/200,one(T)/10,9/10)
|
||||
|
||||
"""
|
||||
Zfl_rk3(::Type{T}, eps::T, tol::T)
|
||||
|
||||
Third order Runge-Kutta integrator for the Zeuthen flow. The fixed step size is given by `eps` and the tolerance for the adaptive integrators by `tol`.
|
||||
"""
|
||||
zfl_rk3(::Type{T}, eps::T, tol::T) where T = FlowIntr{2,T}(one(T)/4,(-17/36,-one(T)),(8/9,3/4),true, (one(T)*5)/3,eps,tol,one(T)/200,one(T)/10,9/10)
|
||||
|
||||
function Base.show(io::IO, int::FlowIntr{N,T}) where {N,T}
|
||||
|
@ -94,7 +134,8 @@ function krnl_add_zth!(frc, frc2::AbstractArray{TA}, U::AbstractArray{TG}, lp::S
|
|||
r = Int64(CUDA.blockIdx().x)
|
||||
it = point_time((b, r), lp)
|
||||
|
||||
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) )
|
||||
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) )
|
||||
OBC = (B == BC_OPEN)
|
||||
|
||||
@inbounds for id in 1:N
|
||||
bu, ru = up((b,r), id, lp)
|
||||
|
@ -112,16 +153,29 @@ function krnl_add_zth!(frc, frc2::AbstractArray{TA}, U::AbstractArray{TG}, lp::S
|
|||
frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) +
|
||||
projalg(U[b,id,r]*X/U[b,id,r]))
|
||||
end
|
||||
else
|
||||
end
|
||||
if OBC
|
||||
if (it > 1) && (it < lp.iL[end])
|
||||
frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) +
|
||||
projalg(U[b,id,r]*X/U[b,id,r]))
|
||||
elseif ((it == lp.iL[end]) || (it == 1)) && (id < N)
|
||||
frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) +
|
||||
projalg(U[b,id,r]*X/U[b,id,r]))
|
||||
end
|
||||
else
|
||||
frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) +
|
||||
projalg(U[b,id,r]*X/U[b,id,r]))
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
"""
|
||||
function flw(U, int::FlowIntr{NI,T}, ns::Int64, gp::GaugeParm, lp::SpaceParm, ymws::YMworkspace)
|
||||
|
||||
Integrates the flow equations with the integration scheme defined by `int` performing `ns` steps with fixed step size. The configuration `U` is overwritten.
|
||||
"""
|
||||
function flw(U, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, lp::SpaceParm, ymws::YMworkspace) where {NI,T}
|
||||
@timeit "Integrating flow equations" begin
|
||||
for i in 1:ns
|
||||
|
@ -152,21 +206,28 @@ flw(U, int::FlowIntr{NI,T}, ns::Int64, gp::GaugeParm, lp::SpaceParm, ymws::YMwor
|
|||
# Adaptive step size integrators
|
||||
##
|
||||
|
||||
"""
|
||||
function flw_adapt(U, int::FlowIntr{NI,T}, tend::T, gp::GaugeParm, lp::SpaceParm, ymws::YMworkspace)
|
||||
|
||||
Integrates the flow equations with the integration scheme defined by `int` using the adaptive step size integrator up to `tend` with the tolerance defined in `int`. The configuration `U` is overwritten.
|
||||
"""
|
||||
function flw_adapt(U, int::FlowIntr{NI,T}, tend::T, epsini::T, gp::GaugeParm, lp::SpaceParm, ymws::YMworkspace) where {NI,T}
|
||||
|
||||
eps = int.eps_ini
|
||||
eps = epsini
|
||||
dt = tend
|
||||
nstp = 0
|
||||
eps_all = Vector{T}(undef,0)
|
||||
while true
|
||||
ns = convert(Int64, floor(dt/eps))
|
||||
if ns > 10
|
||||
flw(U, int, 9, eps, gp, lp, ymws)
|
||||
ymws.U1 .= U
|
||||
flw(U, int, 2, eps/2, gp, lp, ymws)
|
||||
flw(ymws.U1, int, 1, eps, gp, lp, ymws)
|
||||
flw(U, int, 1, eps, gp, lp, ymws)
|
||||
flw(ymws.U1, int, 2, eps/2, gp, lp, ymws)
|
||||
|
||||
dt = dt - 10*eps
|
||||
nstp = nstp + 10
|
||||
push!(eps_all,ntuple(i->eps,10)...)
|
||||
|
||||
# adjust step size
|
||||
ymws.U1 .= ymws.U1 ./ U
|
||||
|
@ -177,6 +238,9 @@ function flw_adapt(U, int::FlowIntr{NI,T}, tend::T, epsini::T, gp::GaugeParm, lp
|
|||
flw(U, int, ns, eps, gp, lp, ymws)
|
||||
dt = dt - ns*eps
|
||||
|
||||
push!(eps_all,ntuple(i->eps,ns)...)
|
||||
push!(eps_all,dt)
|
||||
|
||||
flw(U, int, 1, dt, gp, lp, ymws)
|
||||
dt = zero(tend)
|
||||
|
||||
|
@ -188,7 +252,7 @@ function flw_adapt(U, int::FlowIntr{NI,T}, tend::T, epsini::T, gp::GaugeParm, lp
|
|||
end
|
||||
end
|
||||
|
||||
return nstp, eps
|
||||
return nstp, eps_all
|
||||
end
|
||||
flw_adapt(U, int::FlowIntr{NI,T}, tend::T, gp::GaugeParm, lp::SpaceParm, ymws::YMworkspace) where {NI,T} = flw_adapt(U, int, tend, int.eps_ini, gp, lp, ymws)
|
||||
|
||||
|
@ -201,7 +265,7 @@ flw_adapt(U, int::FlowIntr{NI,T}, tend::T, gp::GaugeParm, lp::SpaceParm, ymws::Y
|
|||
"""
|
||||
function Eoft_plaq([Eslc,] U, gp::GaugeParm, lp::SpaceParm, ymws::YMworkspace)
|
||||
|
||||
Measure the action density `E(t)` using the plaquette discretization. If the argument `Eslc`
|
||||
Measure the action density `E(t)` using the plaquette discretization. If the argument `Eslc` is given
|
||||
the contribution for each Euclidean time slice and plane are returned.
|
||||
"""
|
||||
function Eoft_plaq(Eslc, U, gp::GaugeParm{T,G,NN}, lp::SpaceParm{N,M,B,D}, ymws::YMworkspace) where {T,G,NN,N,M,B,D}
|
||||
|
@ -209,7 +273,8 @@ function Eoft_plaq(Eslc, U, gp::GaugeParm{T,G,NN}, lp::SpaceParm{N,M,B,D}, ymws:
|
|||
@timeit "E(t) plaquette measurement" begin
|
||||
|
||||
ztw = ztwist(gp, lp)
|
||||
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) )
|
||||
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) )
|
||||
OBC = (B == BC_OPEN)
|
||||
|
||||
tp = ntuple(i->i, N-1)
|
||||
V3 = prod(lp.iL[1:end-1])
|
||||
|
@ -230,6 +295,10 @@ function Eoft_plaq(Eslc, U, gp::GaugeParm{T,G,NN}, lp::SpaceParm{N,M,B,D}, ymws:
|
|||
if !SFBC
|
||||
Eslc[1,ipl] = Etmp[1] + Etmp[end]
|
||||
end
|
||||
if OBC ## Check normalization of timelike boundary plaquettes
|
||||
Eslc[end,ipl] = Etmp[end-1]
|
||||
Eslc[1,ipl] = Etmp[1]
|
||||
end
|
||||
else
|
||||
for it in 1:lp.iL[end]
|
||||
Eslc[it,ipl] = 2*Etmp[it]
|
||||
|
@ -254,7 +323,7 @@ function krnl_plaq_pln!(plx, U::AbstractArray{T}, Ubnd, ztw, ipl, lp::SpaceParm{
|
|||
I = point_coord((b,r), lp)
|
||||
|
||||
id1, id2 = lp.plidx[ipl]
|
||||
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI)) && (id1 == lp.iL[end])
|
||||
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI)) && (id1 == N)
|
||||
TWP = ((I[id1]==1)&&(I[id2]==1))
|
||||
|
||||
bu1, ru1 = up((b, r), id1, lp)
|
||||
|
@ -272,15 +341,13 @@ function krnl_plaq_pln!(plx, U::AbstractArray{T}, Ubnd, ztw, ipl, lp::SpaceParm{
|
|||
plx[I] = tr(U[b,id1,r]*gt / (U[b,id2,r]*U[bu2,id1,ru2]))
|
||||
end
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
"""
|
||||
Qtop([Qslc,] U, lp, ymws)
|
||||
Qtop([Qslc,] U, gp::GaugeParm, lp::SpaceParm, ymws::YMworkspace)
|
||||
|
||||
Measure the topological charge `Q` of the configuration `U`. If the argument `Qslc` is present
|
||||
the contribution for each Euclidean time slice are returned.
|
||||
Measure the topological charge `Q` of the configuration `U` using the clover definition of the field strength tensor. If the argument `Qslc` is present the contributions for each Euclidean time slice are returned. Only works in 4D.
|
||||
"""
|
||||
function Qtop(Qslc, U, gp::GaugeParm, lp::SpaceParm{4,M,B,D}, ymws::YMworkspace) where {M,B,D}
|
||||
|
||||
|
@ -296,21 +363,18 @@ function Qtop(Qslc, U, gp::GaugeParm, lp::SpaceParm{4,M,B,D}, ymws::YMworkspace)
|
|||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_add_qd!(ymws.rm, -, ymws.frc1, ymws.frc2, lp)
|
||||
end
|
||||
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_field_tensor!(ymws.frc1, ymws.frc2, U, gp.Ubnd, 2,4, ztw[2], ztw[4], lp)
|
||||
end
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_add_qd!(ymws.rm, +, ymws.frc1, ymws.frc2, lp)
|
||||
end
|
||||
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_field_tensor!(ymws.frc1, ymws.frc2, U, gp.Ubnd, 3,6, ztw[3], ztw[6], lp)
|
||||
end
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_add_qd!(ymws.rm, -, ymws.frc1, ymws.frc2, lp)
|
||||
end
|
||||
|
||||
Qslc .= reshape(Array(CUDA.reduce(+, ymws.rm; dims=tp)),lp.iL[end])./(32*pi^2)
|
||||
end
|
||||
|
||||
|
@ -322,7 +386,7 @@ Qtop(U, gp::GaugeParm, lp::SpaceParm{4,M,D}, ymws::YMworkspace{T}) where {T,M,D}
|
|||
"""
|
||||
function Eoft_clover([Eslc,] U, gp::GaugeParm, lp::SpaceParm, ymws::YMworkspace)
|
||||
|
||||
Measure the action density `E(t)` using the clover discretization. If the argument `Eslc`
|
||||
Measure the action density `E(t)` using the clover discretization. If the argument `Eslc` is given
|
||||
the contribution for each Euclidean time slice and plane are returned.
|
||||
"""
|
||||
function Eoft_clover(Eslc, U, gp::GaugeParm, lp::SpaceParm{4,M,B,D}, ymws::YMworkspace{T}) where {T,M,B,D}
|
||||
|
@ -391,7 +455,7 @@ function krnl_add_et!(rm, frc1, lp::SpaceParm{4,M,B,D}) where {M,B,D}
|
|||
I = point_coord((b,r), lp)
|
||||
rm[I] = dot(X1,X1)
|
||||
end
|
||||
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
|
@ -420,6 +484,7 @@ function krnl_field_tensor!(frc1::AbstractArray{TA}, frc2, U::AbstractArray{T},
|
|||
#First plane
|
||||
id1, id2 = lp.plidx[ipl1]
|
||||
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == 4)
|
||||
OBC = ((B == BC_OPEN) && (id1 == 4))
|
||||
TWP = ((I[id1]==1)&&(I[id2]==1))
|
||||
|
||||
bu1, ru1 = up((b, r), id1, lp)
|
||||
|
@ -439,6 +504,11 @@ function krnl_field_tensor!(frc1::AbstractArray{TA}, frc2, U::AbstractArray{T},
|
|||
frc1[bu1,2,ru1] = zero(TA)
|
||||
frc1[bd,3,rd] = zero(TA)
|
||||
frc1[bu2,4,ru2] = projalg(l2*l1)
|
||||
elseif OBC && (it == lp.iL[end])
|
||||
frc1[b,1,r] = projalg(U[b,id1,r]*l1/U[b,id2,r])
|
||||
frc1[bu1,2,ru1] = zero(TA)
|
||||
frc1[bd,3,rd] = zero(TA)
|
||||
frc1[bu2,4,ru2] = projalg(l2*l1)
|
||||
else
|
||||
if TWP
|
||||
frc1[b,1,r] = projalg(ztw1, U[b,id1,r]*l1/U[b,id2,r])
|
||||
|
@ -456,6 +526,7 @@ function krnl_field_tensor!(frc1::AbstractArray{TA}, frc2, U::AbstractArray{T},
|
|||
# Second plane
|
||||
id1, id2 = lp.plidx[ipl2]
|
||||
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == 4)
|
||||
OBC = ((B == BC_OPEN) && (id1 == 4))
|
||||
TWP = ((I[id1]==1)&&(I[id2]==1))
|
||||
|
||||
bu1, ru1 = up((b, r), id1, lp)
|
||||
|
@ -475,6 +546,11 @@ function krnl_field_tensor!(frc1::AbstractArray{TA}, frc2, U::AbstractArray{T},
|
|||
frc2[bu1,2,ru1] = zero(TA)
|
||||
frc2[bd,3,rd] = zero(TA)
|
||||
frc2[bu2,4,ru2] = projalg(l2*l1)
|
||||
elseif OBC && (it == lp.iL[end])
|
||||
frc1[b,1,r] = projalg(U[b,id1,r]*l1/U[b,id2,r])
|
||||
frc1[bu1,2,ru1] = zero(TA)
|
||||
frc1[bd,3,rd] = zero(TA)
|
||||
frc1[bu2,4,ru2] = projalg(l2*l1)
|
||||
else
|
||||
if TWP
|
||||
frc2[b,1,r] = projalg(ztw2, U[b,id1,r]*l1/U[b,id2,r])
|
||||
|
@ -489,7 +565,5 @@ function krnl_field_tensor!(frc1::AbstractArray{TA}, frc2, U::AbstractArray{T},
|
|||
end
|
||||
end
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
|
||||
function gauge_action(U, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace)
|
||||
|
||||
Returns the value of the gauge plaquette action for the configuration U. The parameters `\beta` and `c0` are taken from the `gp` structure.
|
||||
Returns the value of the gauge action for the configuration U. The parameters ``\\beta`` and `c0` are taken from the `gp` structure.
|
||||
"""
|
||||
function gauge_action(U, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace{T}) where T <: AbstractFloat
|
||||
|
||||
|
@ -37,6 +37,11 @@ function gauge_action(U, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace{T}) whe
|
|||
return S
|
||||
end
|
||||
|
||||
"""
|
||||
function plaquette(U, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace)
|
||||
|
||||
Computes the average plaquette for the configuration `U`.
|
||||
"""
|
||||
function plaquette(U, lp::SpaceParm{N,M,B,D}, gp::GaugeParm, ymws::YMworkspace) where {N,M,B,D}
|
||||
|
||||
ztw = ztwist(gp, lp)
|
||||
|
@ -48,7 +53,12 @@ function plaquette(U, lp::SpaceParm{N,M,B,D}, gp::GaugeParm, ymws::YMworkspace)
|
|||
|
||||
return CUDA.mapreduce(real, +, ymws.cm)/(prod(lp.iL)*lp.npls)
|
||||
end
|
||||
|
||||
|
||||
"""
|
||||
function hamiltonian(mom, U, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace)
|
||||
|
||||
Returns the Energy ``H = \\frac{p^2}{2}+S[U]``, where the momenta field is given by `mom` and the configuration by `U`.
|
||||
"""
|
||||
function hamiltonian(mom, U, lp, gp, ymws)
|
||||
@timeit "Computing Hamiltonian" begin
|
||||
K = CUDA.mapreduce(norm2, +, mom)/2
|
||||
|
@ -58,6 +68,12 @@ function hamiltonian(mom, U, lp, gp, ymws)
|
|||
return K+V
|
||||
end
|
||||
|
||||
|
||||
"""
|
||||
HMC!(U, int::IntrScheme, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace; noacc=false, rng=Random.default_rng(), curng=CUDA.default_rng())
|
||||
|
||||
Performs a HMC step (molecular dynamics integration and accept/reject step). The configuration `U` is updated and function returns the energy violation and if the configuration was accepted in a tuple.
|
||||
"""
|
||||
function HMC!(U, int::IntrScheme, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace{T}; noacc=false, rng=Random.default_rng(), curng=CUDA.default_rng()) where T
|
||||
|
||||
@timeit "HMC trayectory" begin
|
||||
|
@ -92,6 +108,11 @@ function HMC!(U, int::IntrScheme, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspac
|
|||
end
|
||||
HMC!(U, eps, ns, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace{T}; noacc=false, rng=Random.default_rng(), curng=CUDA.default_rng()) where T = HMC!(U, omf4(T, eps, ns), lp, gp, ymws; noacc=noacc, rng, curng)
|
||||
|
||||
"""
|
||||
function MD!(mom, U, int::IntrScheme, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace)
|
||||
|
||||
Performs the integration of a molecular dynamics trajectory starting from the momentum field `mom` and the configuration `U` according to the integrator described by `int`.
|
||||
"""
|
||||
function MD!(mom, U, int::IntrScheme{NI, T}, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace{T}) where {NI, T <: AbstractFloat}
|
||||
|
||||
@timeit "MD evolution" begin
|
||||
|
|
|
@ -75,7 +75,7 @@ function read_cnfg(fname::String)
|
|||
end
|
||||
|
||||
if ibc == BC_SF_AFWB || ibc == BC_SF_ORBI
|
||||
BDIO_read(fb, V)
|
||||
BDIO_read(fb, vec(V))
|
||||
Ubnd = ntuple(i->assign(i, V, 1), 3)
|
||||
BDIO_close!(fb)
|
||||
|
||||
|
@ -297,3 +297,50 @@ function import_cern64(fname, ibc, lp::SpaceParm; log=true)
|
|||
|
||||
return CuArray(Ucpu)
|
||||
end
|
||||
|
||||
|
||||
|
||||
"""
|
||||
read_gp(fname::String)
|
||||
|
||||
Reads Gauge parameters from file `fname` using the native (BDIO) format. Returns GaugeParm and SpaceParm.
|
||||
"""
|
||||
function read_gp(fname::String)
|
||||
|
||||
UID_HDR = 14
|
||||
fb = BDIO_open(fname, "r")
|
||||
while BDIO_get_uinfo(fb) != UID_HDR
|
||||
BDIO_seek!(fb)
|
||||
end
|
||||
ihdr = Vector{Int32}(undef, 2)
|
||||
BDIO_read(fb, ihdr)
|
||||
if (ihdr[1] != convert(Int32, 1653996111)) && (ihdr[2] != convert(Int32, 2))
|
||||
error("Wrong file format [header]")
|
||||
end
|
||||
|
||||
run = BDIO.BDIO_read_str(fb)
|
||||
|
||||
while BDIO_get_uinfo(fb) != 1
|
||||
BDIO_seek!(fb)
|
||||
end
|
||||
|
||||
ifoo = Vector{Int32}(undef, 4)
|
||||
BDIO_read(fb, ifoo)
|
||||
ndim = convert(Int64, ifoo[1])
|
||||
npls = convert(Int64, round(ndim*(ndim-1)/2))
|
||||
ibc = convert(Int64, ifoo[2])
|
||||
nf = ifoo[4]
|
||||
|
||||
ifoo = Vector{Int32}(undef, ndim+convert(Int32, npls))
|
||||
BDIO_read(fb, ifoo)
|
||||
iL = ntuple(i -> convert(Int64, ifoo[i]),ndim)
|
||||
ntw = ntuple(i -> convert(Int64, ifoo[i+ndim]), npls)
|
||||
|
||||
dfoo = Vector{Float64}(undef, 4)
|
||||
BDIO_read(fb, dfoo)
|
||||
|
||||
lp = SpaceParm{ndim}(iL, (4,4,4,4), ibc, ntw)
|
||||
gp = GaugeParm{Float64}(SU3{Float64}, dfoo[1], dfoo[2])
|
||||
|
||||
return gp, lp
|
||||
end
|
||||
|
|
|
@ -10,9 +10,9 @@
|
|||
###
|
||||
|
||||
"""
|
||||
sfcoupling(U, lp::SpaceParm{N,M,B,D}, gp::GaugeParm, ymws::YMworkspace) where {N,M,B,D}
|
||||
sfcoupling(U, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace)
|
||||
|
||||
Measures the Schrodinger Functional coupling `ds/d\eta` and `d^2S/d\eta d\nu`.
|
||||
Measures the Schrodinger Functional coupling ``{\\rm d}S/{\\rm d}\\eta`` and ``{\\rm d}^2S/{\\rm d}\\eta d\nu``.
|
||||
"""
|
||||
function sfcoupling(U, lp::SpaceParm{N,M,B,D}, gp::GaugeParm, ymws::YMworkspace) where {N,M,B,D}
|
||||
|
||||
|
@ -89,7 +89,11 @@ end
|
|||
return exp(X)
|
||||
end
|
||||
|
||||
"""
|
||||
function setbndfield(U, phi, lp::SpaceParm)
|
||||
|
||||
Sets abelian boundary fields with phases `phi[1]` and `phi[2]` to the configuration `U` at time salice ``x_0=0``.
|
||||
"""
|
||||
function setbndfield(U, phi, lp::SpaceParm{N,M,B,D}) where {N,M,B,D}
|
||||
|
||||
CUDA.@sync begin
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue