diff --git a/src/Dirac/Diracflow.jl b/src/Dirac/Diracflow.jl index 575cda8..cfd2c65 100644 --- a/src/Dirac/Diracflow.jl +++ b/src/Dirac/Diracflow.jl @@ -18,7 +18,7 @@ Integrates the flow equations with the integration scheme defined by `int` perfo function flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T} @timeit "Integrating flow equations" begin for i in 1:ns - force_gauge_flw(ymws, U, int.c0, 1, gp, lp) + force_gauge(ymws, U, int.c0, 1, gp, lp) if int.add_zth add_zth_term(ymws::YMworkspace, U, lp) @@ -31,7 +31,7 @@ function flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, dpar::D U .= expm.(U, ymws.mom, 2*eps*int.r) for k in 1:NI - force_gauge_flw(ymws, U, int.c0, 1, gp, lp) + force_gauge(ymws, U, int.c0, 1, gp, lp) if int.add_zth add_zth_term(ymws::YMworkspace, U, lp) @@ -51,6 +51,43 @@ function flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, dpar::D end flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T} = flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, int.eps, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) +function flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm{N,M,BC_OPEN,D}, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T,N,M,D} + @timeit "Integrating flow equations" begin + for i in 1:ns + force_gauge(ymws, U, int.c0, 1, gp, lp) + + if int.add_zth + add_zth_term(ymws::YMworkspace, U, lp) + end + rescale_bnd(ymws, lp) + + Nablanabla!(dws.sAp, U, psi, dpar, dws, lp) + psi .= psi + 2*int.r*eps*dws.sAp + + ymws.mom .= ymws.frc1 + U .= expm.(U, ymws.mom, 2*eps*int.r) + + for k in 1:NI + force_gauge(ymws, U, int.c0, 1, gp, lp) + + if int.add_zth + add_zth_term(ymws::YMworkspace, U, lp) + end + rescale_bnd(ymws, lp) + + Nablanabla!(dws.sp, U, psi, dpar, dws, lp) + dws.sAp .= int.e0[k].*dws.sAp .+ int.e1[k].*dws.sp + psi .= psi + 2*eps*dws.sAp + + ymws.mom .= int.e0[k].*ymws.mom .+ int.e1[k].*ymws.frc1 + U .= expm.(U, ymws.mom, 2*eps) + end + end + end + + return nothing +end + """ function backflow(psi, U, Dt, nsave::Int64, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) @@ -122,7 +159,7 @@ function bflw_step!(psi, U, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam @timeit "GPU to CPU" V = Array(U) - force_gauge_flw(ymws, U, int.c0, 1, gp, lp) + force_gauge(ymws, U, int.c0, 1, gp, lp) if int.add_zth add_zth_term(ymws::YMworkspace, U, lp) @@ -131,7 +168,7 @@ function bflw_step!(psi, U, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam ymws.mom .= ymws.frc1 U .= expm.(U, ymws.mom, 2*eps*int.r) - force_gauge_flw(ymws, U, int.c0, 1, gp, lp) + force_gauge(ymws, U, int.c0, 1, gp, lp) if int.add_zth add_zth_term(ymws::YMworkspace, U, lp) @@ -144,7 +181,7 @@ function bflw_step!(psi, U, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam @timeit "CPU to GPU" copyto!(U,V) - force_gauge_flw(ymws, U, int.c0, 1, gp, lp) + force_gauge(ymws, U, int.c0, 1, gp, lp) if int.add_zth add_zth_term(ymws::YMworkspace, U, lp) @@ -165,6 +202,54 @@ function bflw_step!(psi, U, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam return nothing end +function bflw_step!(psi, U, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm{N,M,BC_OPEN,D}, ymws::YMworkspace, dws::DiracWorkspace) where {N,M,D} + + @timeit "Backflow step" begin + + @timeit "GPU to CPU" V = Array(U) + + force_gauge(ymws, U, int.c0, 1, gp, lp) + if int.add_zth + add_zth_term(ymws::YMworkspace, U, lp) + end + rescale_bnd(ymws, lp) + + ymws.mom .= ymws.frc1 + U .= expm.(U, ymws.mom, 2*eps*int.r) + + force_gaugew(ymws, U, int.c0, 1, gp, lp) + if int.add_zth + add_zth_term(ymws::YMworkspace, U, lp) + end + rescale_bnd(ymws, lp) + + ymws.mom .= int.e0[1].*ymws.mom .+ int.e1[1].*ymws.frc1 + U .= expm.(U, ymws.mom, 2*eps) + + Nablanabla!(dws.sp, U, 0.75*2*eps*psi, dpar, dws, lp) + + @timeit "CPU to GPU" copyto!(U,V) + + force_gauge(ymws, U, int.c0, 1, gp, lp) + if int.add_zth + add_zth_term(ymws::YMworkspace, U, lp) + end + rescale_bnd(ymws, lp) + + U .= expm.(U, ymws.frc1, 2*eps*int.r) + + Nablanabla!(dws.sAp, U, 2*eps*dws.sp, dpar, dws, lp) + dws.sAp .= psi + (8/9)*dws.sAp + + @timeit "CPU to GPU" copyto!(U,V) + + Nablanabla!(psi, U, 2*eps*(dws.sAp - (8/9)*dws.sp), dpar, dws, lp) + psi .= (1/4)*psi + dws.sp + dws.sAp + + end + + return nothing +end """ flw_adapt(U, psi, int::FlowIntr{NI,T}, tend::T, epsini::T, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) diff --git a/src/YM/YMact.jl b/src/YM/YMact.jl index 272fe91..fa6fdae 100644 --- a/src/YM/YMact.jl +++ b/src/YM/YMact.jl @@ -1189,7 +1189,6 @@ function force_gauge_flw(ymws::YMworkspace, U, c0, cG, gp::GaugeParm, lp::SpaceP return nothing end - """ function force_wilson(ymws::YMworkspace, U, gp::GaugeParm, lp::SpaceParm) diff --git a/src/YM/YMflow.jl b/src/YM/YMflow.jl index 62abde1..dbcbf4c 100644 --- a/src/YM/YMflow.jl +++ b/src/YM/YMflow.jl @@ -142,7 +142,63 @@ function rescale_bnd(ymws::YMworkspace, lp) return nothing end +function krnl_add_zth!(frc, frc2::AbstractArray{TA}, U::AbstractArray{TG}, lp::SpaceParm{N,M,BC_OPEN,D}) where {TA,TG,N,M,D} + @inbounds begin + b = Int64(CUDA.threadIdx().x) + r = Int64(CUDA.blockIdx().x) + it = point_time((b, r), lp) + + @inbounds for id in 1:N + bu, ru = up((b,r), id, lp) + bd, rd = dw((b,r), id, lp) + + X = frc[bu,id,ru] + Y = frc[bd,id,rd] + Ud = U[bd,id,rd] + + if (id < N) + frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) + + projalg(U[b,id,r]*X/U[b,id,r])) + elseif (it > 1) && (it < (lp.iL[end]-1)) + frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) + + projalg(U[b,id,r]*X/U[b,id,r])) + elseif (it == 1) || (it == (lp.iL[end]-1)) + frc2[b,id,r] = frc[b,id,r] + end + end + end + return nothing +end + +function krnl_add_zth!(frc, frc2::AbstractArray{TA}, U::AbstractArray{TG}, lp::Union{SpaceParm{N,M,BC_SF_ORBI,D},SpaceParm{N,M,BC_SF_AFWB,D}}) where {TA,TG,N,M,D} + + @inbounds begin + b = Int64(CUDA.threadIdx().x) + r = Int64(CUDA.blockIdx().x) + it = point_time((b, r), lp) + + @inbounds for id in 1:N + bu, ru = up((b,r), id, lp) + bd, rd = dw((b,r), id, lp) + + X = frc[bu,id,ru] + Y = frc[bd,id,rd] + Ud = U[bd,id,rd] + + if (it > 1) && (it < lp.iL[end]) + frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) + + projalg(U[b,id,r]*X/U[b,id,r])) + elseif (it == lp.iL[end]) && (id < N) + frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) + + projalg(U[b,id,r]*X/U[b,id,r])) + else + frc2[b,id,r] = frc[b,id,r] + end + end + end + return nothing +end function krnl_add_zth!(frc, frc2::AbstractArray{TA}, U::AbstractArray{TG}, lp::SpaceParm{N,M,B,D}) where {TA,TG,N,M,B,D} @inbounds begin @@ -150,9 +206,6 @@ function krnl_add_zth!(frc, frc2::AbstractArray{TA}, U::AbstractArray{TG}, lp::S r = Int64(CUDA.blockIdx().x) it = point_time((b, r), lp) - SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) - OBC = (B == BC_OPEN) - @inbounds for id in 1:N bu, ru = up((b,r), id, lp) bd, rd = dw((b,r), id, lp) @@ -161,29 +214,8 @@ function krnl_add_zth!(frc, frc2::AbstractArray{TA}, U::AbstractArray{TG}, lp::S Y = frc[bd,id,rd] Ud = U[bd,id,rd] - if SFBC - if (it > 1) && (it < lp.iL[end]) - frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) + - projalg(U[b,id,r]*X/U[b,id,r])) - elseif (it == lp.iL[end]) && (id < N) - frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) + - projalg(U[b,id,r]*X/U[b,id,r])) - end - end - if OBC - if (id < N) - frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) + - projalg(U[b,id,r]*X/U[b,id,r])) - elseif (it > 1) && (it < (lp.iL[end]-1)) - frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) + - projalg(U[b,id,r]*X/U[b,id,r])) - elseif (it == 1) || (it == (lp.iL[end]-1)) - frc2[b,id,r] = frc[b,id,r] - end - else - frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) + - projalg(U[b,id,r]*X/U[b,id,r])) - end + frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) + + projalg(U[b,id,r]*X/U[b,id,r])) end end return nothing @@ -224,11 +256,13 @@ flw(U, int::FlowIntr{NI,T}, ns::Int64, gp::GaugeParm, lp::SpaceParm, ymws::YMwor function flw(U, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, lp::SpaceParm{N,M,BC_OPEN,D}, ymws::YMworkspace) where {NI,T,N,M,D} @timeit "Integrating flow equations" begin for i in 1:ns + force_gauge(ymws, U, int.c0, 1, gp, lp) if int.add_zth add_zth_term(ymws::YMworkspace, U, lp) end rescale_bnd(ymws, lp) + ymws.mom .= ymws.frc1 U .= expm.(U, ymws.mom, 2*eps*int.r)