Merge branch 'master' of igit.ific.uv.es:alramos/latticegpu.jl

2025-07-01 22:39:27 +02:00 · 2025-02-25 16:58:16 +01:00 · 2025-02-25 16:58:16 +01:00 · 3c59b9251a
commit 3c59b9251a
parent 830163e90f 94f410b51f
41 changed files with 3467 additions and 902 deletions
--- a/src/YM/YM.jl
+++ b/src/YM/YM.jl
@ -20,6 +20,19 @@ using ..MD

 import Base.show

+"""
+        struct GaugeParm{T,G,N}
+
+Structure containing the parameters of a pure gauge simulation. These are:
+- beta: Type `T`. The bare coupling of the simulation.
+- c0: Type `T`. LatticeGPU supports the simulation of gauge actions made of 1x1 Wilson Loops and 2x1 Wilson loops. The parameter c0 defines the coefficient on the simulation of the 1x1 loops. Some common choices are:
+  - c0=1: Wilson plaquette action.
+  - c0=5/3: Tree-level improved Lüscher-Weisz action.
+  - c0=3.648: Iwasaki gauge action.
+- cG: Tuple (`T`, `T`). Boundary improvement parameters.
+- ng: `Int64`. Rank of the gauge group.
+- Ubnd: Boundary field for SF boundary conditions.
+"""
 struct GaugeParm{T,G,N}
    beta::T
    c0::T
@ -63,6 +76,21 @@ function Base.show(io::IO, gp::GaugeParm{T, G, N}) where {T,G,N}
    return nothing
 end

+"""
+        struct YMworkspace{T}
+
+Structure containing memory workspace that is reused by different routines in order to avoid allocating/deallocating time.
+The parameter `T` represents the precision of the simulation (i.e. single/double). The structure contains the following components
+- GRP: Group being simulated.
+- ALG: Corresponding Algebra.
+- PRC: Precision (i.e. `T`).
+- frc1: Algebra field with natural indexing.
+- frc2: Algebra field with natural indexing.
+- mom:  Algebra field with natural indexing.
+- U1:   Group field with natural indexing.
+- cm:   Complex field with lexicographic indexing.
+- rm:   Real field with lexicographic indexing.
+"""
 struct YMworkspace{T}
    GRP
    ALG
@ -110,7 +138,11 @@ function Base.show(io::IO, ymws::YMworkspace)
    return nothing
 end

+"""
+        function ztwist(gp::GaugeParm{T,G}, lp::SpaceParm{N,M,B,D}[, ipl])

+Returns the twist factor. If a plane index is passed, returns the twist factor as a Complex{T}. If this is not provided, returns a tuple, containing the factor of each plane.
+"""
 function ztwist(gp::GaugeParm{T,G}, lp::SpaceParm{N,M,B,D}) where {T,G,N,M,B,D}

    function plnf(ipl)
@ -133,10 +165,10 @@ include("YMfields.jl")
 export randomize!, zero!, norm2

 include("YMact.jl")
-export krnl_plaq!, force0_wilson!
+export krnl_plaq!, force_gauge, force_wilson

 include("YMhmc.jl")
-export gauge_action, hamiltonian, plaquette, HMC!, OMF4!
+export gauge_action, hamiltonian, plaquette, HMC!, MD!

 include("YMflow.jl")
 export FlowIntr, flw, flw_adapt
@ -147,6 +179,6 @@ include("YMsf.jl")
 export sfcoupling, bndfield, setbndfield

 include("YMio.jl")
-export import_lex64, import_cern64, import_bsfqcd, save_cnfg, read_cnfg
+export import_lex64, import_cern64, import_bsfqcd, save_cnfg, read_cnfg, read_gp

 end
--- a/src/YM/YMact.jl
+++ b/src/YM/YMact.jl
--- a/src/YM/YMfields.jl
+++ b/src/YM/YMfields.jl
@ -9,8 +9,14 @@
 ### created: Thu Jul 15 15:16:47 2021
 ###                               

-function randomize!(f, lp::SpaceParm, ymws::YMworkspace; curng=CUDA.default_rng()) 
        
+"""
+        function randomize!(f, lp::SpaceParm, ymws::YMworkspace; curng=CUDA.default_rng())
+
+Given an algebra field with natural indexing, this routine sets the components to random Gaussian distributed values. If SF boundary conditions are used, the force at the boundaries is set to zero.
+"""
+function randomize!(f, lp::SpaceParm, ymws::YMworkspace; curng=CUDA.default_rng())
+
    if ymws.ALG == SU2alg
        @timeit "Randomize SU(2) algebra field" begin
            m = Random.randn(curng, ymws.PRC, lp.bsz,lp.ndim,3,lp.rsz)
@ -49,31 +55,44 @@ function krnl_assign_SU3!(frc::AbstractArray{T}, m, lp::SpaceParm{N,M,BC_PERIODI
    return nothing
 end

-function krnl_assign_SU3!(frc::AbstractArray{T}, m, lp::SpaceParm{N,M,B,D}) where {T,N,M,B,D}
+function krnl_assign_SU3!(frc::AbstractArray{T}, m, lp::SpaceParm{N,M,BC_OPEN,D}) where {T,N,M,D}
+
+    @inbounds begin
+        b = Int64(CUDA.threadIdx().x)
+        r = Int64(CUDA.blockIdx().x)
+        for id in 1:lp.ndim
+            frc[b,id,r] = SU3alg(m[b,id,1,r], m[b,id,2,r], m[b,id,3,r],
+                                 m[b,id,4,r], m[b,id,5,r], m[b,id,6,r],
+                                 m[b,id,7,r], m[b,id,8,r])
+        end
+    end
+
+    return nothing
+end
+
+function krnl_assign_SU3!(frc::AbstractArray{T}, m, lp::Union{SpaceParm{N,M,BC_SF_ORBI,D},SpaceParm{N,M,BC_SF_AFWB,D}}) where {T,N,M,D}

    @inbounds begin
        b = Int64(CUDA.threadIdx().x)
        r = Int64(CUDA.blockIdx().x)
        it = point_time((b,r), lp)

-        if ((B==BC_SF_AFWB)||(B==BC_SF_ORBI))
-            if it == 1
-                for id in 1:lp.ndim-1
-                    frc[b,id,r] = zero(T)
-                end
-                frc[b,N,r] = SU3alg(m[b,N,1,r], m[b,N,2,r], m[b,N,3,r],
-                                    m[b,N,4,r], m[b,N,5,r], m[b,N,6,r],
-                                    m[b,N,7,r], m[b,N,8,r])
-            else
-                for id in 1:lp.ndim
-                    frc[b,id,r] = SU3alg(m[b,id,1,r], m[b,id,2,r], m[b,id,3,r],
-                                         m[b,id,4,r], m[b,id,5,r], m[b,id,6,r],
-                                         m[b,id,7,r], m[b,id,8,r])
-                end
+        if it == 1
+            for id in 1:lp.ndim-1
+                frc[b,id,r] = zero(T)
+            end
+            frc[b,N,r] = SU3alg(m[b,N,1,r], m[b,N,2,r], m[b,N,3,r],
+                                m[b,N,4,r], m[b,N,5,r], m[b,N,6,r],
+                                m[b,N,7,r], m[b,N,8,r])
+        else
+            for id in 1:lp.ndim
+                frc[b,id,r] = SU3alg(m[b,id,1,r], m[b,id,2,r], m[b,id,3,r],
+                                     m[b,id,4,r], m[b,id,5,r], m[b,id,6,r],
+                                     m[b,id,7,r], m[b,id,8,r])
            end
        end
    end
-        
+
    return nothing
 end

--- a/src/YM/YMflow.jl
+++ b/src/YM/YMflow.jl
@ -10,6 +10,11 @@
 ###                               


+"""
+        struct FlowIntr{N,T}
+
+Structure containing info about a particular flow integrator
+"""
 struct FlowIntr{N,T}
    r::T
    e0::NTuple{N,T}
@ -26,11 +31,46 @@ struct FlowIntr{N,T}
 end

 # pre-defined integrators
+"""
+        wfl_euler(::Type{T}, eps::T, tol::T)
+
+Euler scheme integrator for the Wilson Flow. The fixed step size is given by `eps` and the tolerance for the adaptive integrators by `tol`. 
+"""
 wfl_euler(::Type{T}, eps::T, tol::T) where T = FlowIntr{0,T}(one(T),(),(),false,one(T),eps,tol,one(T)/200,one(T)/10,9/10)
+
+"""
+        zfl_euler(::Type{T}, eps::T, tol::T)
+
+Euler scheme integrator for the Zeuthen flow. The fixed step size is given by `eps` and the tolerance for the adaptive integrators by `tol`. 
+"""
 zfl_euler(::Type{T}, eps::T, tol::T) where T = FlowIntr{0,T}(one(T),(),(),true, (one(T)*5)/3,eps,tol,one(T)/200,one(T)/10,9/10)
+
+"""
+        wfl_rk2(::Type{T}, eps::T, tol::T)
+
+Second order Runge-Kutta integrator for the Wilson flow. The fixed step size is given by `eps` and the tolerance for the adaptive integrators by `tol`. 
+"""
 wfl_rk2(::Type{T}, eps::T, tol::T)   where T = FlowIntr{1,T}(one(T)/2,(-one(T)/2,),(one(T),),false,one(T),eps,tol,one(T)/200,one(T)/10,9/10)
+
+"""
+        zfl_rk2(::Type{T}, eps::T, tol::T)
+
+Second order Runge-Kutta integrator for the Zeuthen flow. The fixed step size is given by `eps` and the tolerance for the adaptive integrators by `tol`. 
+"""
 zfl_rk2(::Type{T}, eps::T, tol::T)   where T = FlowIntr{1,T}(one(T)/2,(-one(T)/2,),(one(T),),true, (one(T)*5)/3,eps,tol,one(T)/200,one(T)/10,9/10)
+
+"""
+        wfl_rk3(::Type{T}, eps::T, tol::T)
+
+Third order Runge-Kutta integrator for the Wilson flow. The fixed step size is given by `eps` and the tolerance for the adaptive integrators by `tol`. 
+"""
 wfl_rk3(::Type{T}, eps::T, tol::T)   where T = FlowIntr{2,T}(one(T)/4,(-17/36,-one(T)),(8/9,3/4),false,one(T),eps,tol,one(T)/200,one(T)/10,9/10)
+
+"""
+        Zfl_rk3(::Type{T}, eps::T, tol::T)
+
+Third order Runge-Kutta integrator for the Zeuthen flow. The fixed step size is given by `eps` and the tolerance for the adaptive integrators by `tol`. 
+"""
 zfl_rk3(::Type{T}, eps::T, tol::T)   where T = FlowIntr{2,T}(one(T)/4,(-17/36,-one(T)),(8/9,3/4),true, (one(T)*5)/3,eps,tol,one(T)/200,one(T)/10,9/10)

 function Base.show(io::IO, int::FlowIntr{N,T}) where {N,T}
@ -94,7 +134,8 @@ function krnl_add_zth!(frc, frc2::AbstractArray{TA}, U::AbstractArray{TG}, lp::S
        r = Int64(CUDA.blockIdx().x)
        it = point_time((b, r), lp)

-        SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) 
+        SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) )
+        OBC = (B == BC_OPEN)

        @inbounds for id in 1:N
            bu, ru = up((b,r), id, lp)
@ -112,16 +153,29 @@ function krnl_add_zth!(frc, frc2::AbstractArray{TA}, U::AbstractArray{TG}, lp::S
                    frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) +
                        projalg(U[b,id,r]*X/U[b,id,r]))
                end
-            else 
+            end
+            if OBC
+                if (it > 1) && (it < lp.iL[end])
+                    frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) +
+                        projalg(U[b,id,r]*X/U[b,id,r]))
+                elseif ((it == lp.iL[end]) || (it == 1))  && (id < N)
+                    frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) +
+                        projalg(U[b,id,r]*X/U[b,id,r]))
+                end
+            else
                frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) +
                    projalg(U[b,id,r]*X/U[b,id,r]))
            end
        end
    end
-    
    return nothing
 end

+"""
+        function flw(U, int::FlowIntr{NI,T}, ns::Int64, gp::GaugeParm, lp::SpaceParm, ymws::YMworkspace)
+
+Integrates the flow equations with the integration scheme defined by `int` performing `ns` steps with fixed step size. The configuration `U` is overwritten.   
+"""
 function flw(U, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, lp::SpaceParm, ymws::YMworkspace) where {NI,T}    
    @timeit "Integrating flow equations" begin
        for i in 1:ns
@ -152,21 +206,28 @@ flw(U, int::FlowIntr{NI,T}, ns::Int64, gp::GaugeParm, lp::SpaceParm, ymws::YMwor
 # Adaptive step size integrators
 ##

+"""
+        function flw_adapt(U, int::FlowIntr{NI,T}, tend::T, gp::GaugeParm, lp::SpaceParm, ymws::YMworkspace)
+
+Integrates the flow equations with the integration scheme defined by `int` using the adaptive step size integrator up to `tend` with the tolerance defined in `int`. The configuration `U` is overwritten.   
+"""
 function flw_adapt(U, int::FlowIntr{NI,T}, tend::T, epsini::T, gp::GaugeParm, lp::SpaceParm, ymws::YMworkspace) where {NI,T}

-    eps = int.eps_ini
+    eps = epsini
    dt  = tend
    nstp = 0
+    eps_all = Vector{T}(undef,0)
    while true
        ns = convert(Int64, floor(dt/eps))
        if ns > 10
            flw(U, int, 9, eps, gp, lp, ymws)
            ymws.U1 .= U
-            flw(U, int, 2, eps/2, gp, lp, ymws)
-            flw(ymws.U1, int, 1, eps, gp, lp, ymws)
+            flw(U, int, 1, eps, gp, lp, ymws)
+            flw(ymws.U1, int, 2, eps/2, gp, lp, ymws)
            
            dt = dt - 10*eps
            nstp = nstp + 10
+            push!(eps_all,ntuple(i->eps,10)...)

            # adjust step size
            ymws.U1 .= ymws.U1 ./ U
@ -177,6 +238,9 @@ function flw_adapt(U, int::FlowIntr{NI,T}, tend::T, epsini::T, gp::GaugeParm, lp
            flw(U, int, ns, eps, gp, lp, ymws)
            dt = dt - ns*eps

+            push!(eps_all,ntuple(i->eps,ns)...)
+            push!(eps_all,dt)
+
            flw(U, int, 1, dt, gp, lp, ymws)
            dt = zero(tend)

@ -188,7 +252,7 @@ function flw_adapt(U, int::FlowIntr{NI,T}, tend::T, epsini::T, gp::GaugeParm, lp
        end
    end

-    return nstp, eps
+    return nstp, eps_all
 end
 flw_adapt(U, int::FlowIntr{NI,T}, tend::T, gp::GaugeParm, lp::SpaceParm, ymws::YMworkspace) where {NI,T} = flw_adapt(U, int, tend, int.eps_ini, gp, lp, ymws)

@ -201,7 +265,7 @@ flw_adapt(U, int::FlowIntr{NI,T}, tend::T, gp::GaugeParm, lp::SpaceParm, ymws::Y
 """
    function Eoft_plaq([Eslc,] U, gp::GaugeParm, lp::SpaceParm, ymws::YMworkspace)

-Measure the action density `E(t)` using the plaquette discretization. If the argument `Eslc`
+Measure the action density `E(t)` using the plaquette discretization. If the argument `Eslc` is given
 the contribution for each Euclidean time slice and plane are returned.
 """
 function Eoft_plaq(Eslc, U, gp::GaugeParm{T,G,NN}, lp::SpaceParm{N,M,B,D}, ymws::YMworkspace) where {T,G,NN,N,M,B,D}
@ -209,7 +273,8 @@ function Eoft_plaq(Eslc, U, gp::GaugeParm{T,G,NN}, lp::SpaceParm{N,M,B,D}, ymws:
    @timeit "E(t) plaquette measurement" begin

        ztw = ztwist(gp, lp)
-        SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) 
+        SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) )
+        OBC = (B == BC_OPEN)

        tp = ntuple(i->i, N-1)
        V3 = prod(lp.iL[1:end-1])
@ -230,6 +295,10 @@ function Eoft_plaq(Eslc, U, gp::GaugeParm{T,G,NN}, lp::SpaceParm{N,M,B,D}, ymws:
                if !SFBC
                    Eslc[1,ipl] = Etmp[1] + Etmp[end]
                end
+                if OBC ## Check normalization of timelike boundary plaquettes
+                    Eslc[end,ipl] = Etmp[end-1]
+                    Eslc[1,ipl] = Etmp[1]
+                end
            else
                for it in 1:lp.iL[end]
                    Eslc[it,ipl] = 2*Etmp[it]
@ -254,7 +323,7 @@ function krnl_plaq_pln!(plx, U::AbstractArray{T}, Ubnd, ztw, ipl, lp::SpaceParm{
        I = point_coord((b,r), lp)
        
        id1, id2 = lp.plidx[ipl]
-        SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI)) && (id1 == lp.iL[end]) 
+        SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI)) && (id1 == N)
        TWP  = ((I[id1]==1)&&(I[id2]==1))
        
        bu1, ru1 = up((b, r), id1, lp)
@ -272,15 +341,13 @@ function krnl_plaq_pln!(plx, U::AbstractArray{T}, Ubnd, ztw, ipl, lp::SpaceParm{
            plx[I] = tr(U[b,id1,r]*gt / (U[b,id2,r]*U[bu2,id1,ru2]))
        end            
    end
-        
    return nothing
 end

 """
-    Qtop([Qslc,] U, lp, ymws)
+    Qtop([Qslc,] U, gp::GaugeParm, lp::SpaceParm, ymws::YMworkspace)

-Measure the topological charge `Q` of the configuration `U`. If the argument `Qslc` is present
-the contribution for each Euclidean time slice are returned.
+Measure the topological charge `Q` of the configuration `U` using the clover definition of the field strength tensor. If the argument `Qslc` is present the contributions for each Euclidean time slice are returned. Only works in 4D.
 """
 function Qtop(Qslc, U, gp::GaugeParm, lp::SpaceParm{4,M,B,D}, ymws::YMworkspace) where {M,B,D}

@ -296,21 +363,18 @@ function Qtop(Qslc, U, gp::GaugeParm, lp::SpaceParm{4,M,B,D}, ymws::YMworkspace)
        CUDA.@sync begin
            CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_add_qd!(ymws.rm, -, ymws.frc1, ymws.frc2, lp)
        end
-    
        CUDA.@sync begin
            CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_field_tensor!(ymws.frc1, ymws.frc2, U, gp.Ubnd, 2,4, ztw[2], ztw[4], lp)
        end
        CUDA.@sync begin
            CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_add_qd!(ymws.rm, +, ymws.frc1, ymws.frc2, lp)
        end
-    
        CUDA.@sync begin
            CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_field_tensor!(ymws.frc1, ymws.frc2, U, gp.Ubnd, 3,6, ztw[3], ztw[6], lp)
        end
        CUDA.@sync begin
            CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_add_qd!(ymws.rm, -, ymws.frc1, ymws.frc2, lp)
        end
-        
        Qslc .= reshape(Array(CUDA.reduce(+, ymws.rm; dims=tp)),lp.iL[end])./(32*pi^2)
    end    

@ -322,7 +386,7 @@ Qtop(U, gp::GaugeParm, lp::SpaceParm{4,M,D}, ymws::YMworkspace{T}) where {T,M,D}
 """
    function Eoft_clover([Eslc,] U, gp::GaugeParm, lp::SpaceParm, ymws::YMworkspace)

-Measure the action density `E(t)` using the clover discretization. If the argument `Eslc`
+Measure the action density `E(t)` using the clover discretization. If the argument `Eslc` is given
 the contribution for each Euclidean time slice and plane are returned.
 """
 function Eoft_clover(Eslc, U, gp::GaugeParm, lp::SpaceParm{4,M,B,D}, ymws::YMworkspace{T}) where {T,M,B,D}
@ -391,7 +455,7 @@ function krnl_add_et!(rm, frc1, lp::SpaceParm{4,M,B,D}) where {M,B,D}
        I = point_coord((b,r), lp)
        rm[I] = dot(X1,X1)
    end
-        
+
    return nothing
 end

@ -420,6 +484,7 @@ function krnl_field_tensor!(frc1::AbstractArray{TA}, frc2, U::AbstractArray{T},
        #First plane
        id1, id2 = lp.plidx[ipl1]
        SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == 4)
+        OBC = ((B == BC_OPEN) && (id1 == 4))
        TWP  = ((I[id1]==1)&&(I[id2]==1))
        
        bu1, ru1 = up((b, r), id1, lp)
@ -439,6 +504,11 @@ function krnl_field_tensor!(frc1::AbstractArray{TA}, frc2, U::AbstractArray{T},
            frc1[bu1,2,ru1] = zero(TA)
            frc1[bd,3,rd]   = zero(TA)
            frc1[bu2,4,ru2] = projalg(l2*l1)
+        elseif OBC && (it == lp.iL[end])
+            frc1[b,1,r]     = projalg(U[b,id1,r]*l1/U[b,id2,r])
+            frc1[bu1,2,ru1] = zero(TA)
+            frc1[bd,3,rd]   = zero(TA)
+            frc1[bu2,4,ru2] = projalg(l2*l1)
        else
            if TWP
                frc1[b,1,r]     = projalg(ztw1, U[b,id1,r]*l1/U[b,id2,r])
@ -456,6 +526,7 @@ function krnl_field_tensor!(frc1::AbstractArray{TA}, frc2, U::AbstractArray{T},
        # Second plane
        id1, id2 = lp.plidx[ipl2]
        SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == 4)
+        OBC = ((B == BC_OPEN) && (id1 == 4))
        TWP  = ((I[id1]==1)&&(I[id2]==1))

        bu1, ru1 = up((b, r), id1, lp)
@ -475,6 +546,11 @@ function krnl_field_tensor!(frc1::AbstractArray{TA}, frc2, U::AbstractArray{T},
            frc2[bu1,2,ru1] = zero(TA)
            frc2[bd,3,rd]   = zero(TA)
            frc2[bu2,4,ru2] = projalg(l2*l1)
+        elseif OBC && (it == lp.iL[end])
+            frc1[b,1,r]     = projalg(U[b,id1,r]*l1/U[b,id2,r])
+            frc1[bu1,2,ru1] = zero(TA)
+            frc1[bd,3,rd]   = zero(TA)
+            frc1[bu2,4,ru2] = projalg(l2*l1)
        else
            if TWP
                frc2[b,1,r]     = projalg(ztw2, U[b,id1,r]*l1/U[b,id2,r])
@ -489,7 +565,5 @@ function krnl_field_tensor!(frc1::AbstractArray{TA}, frc2, U::AbstractArray{T},
            end
        end
    end
-        
    return nothing
 end
-    
--- a/src/YM/YMhmc.jl
+++ b/src/YM/YMhmc.jl
@ -13,7 +13,7 @@
    
    function gauge_action(U, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace)

-Returns the value of the gauge plaquette action for the configuration U. The parameters `\beta` and `c0` are taken from the `gp` structure. 
+Returns the value of the gauge action for the configuration U. The parameters ``\\beta`` and `c0` are taken from the `gp` structure.
 """
 function gauge_action(U, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace{T}) where T <: AbstractFloat

@ -37,6 +37,11 @@ function gauge_action(U, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace{T}) whe
    return S
 end

+"""
+        function plaquette(U, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace)
+
+Computes the average plaquette for the configuration `U`.
+"""
 function plaquette(U, lp::SpaceParm{N,M,B,D}, gp::GaugeParm, ymws::YMworkspace) where {N,M,B,D}

    ztw = ztwist(gp, lp)
@ -48,7 +53,12 @@ function plaquette(U, lp::SpaceParm{N,M,B,D}, gp::GaugeParm, ymws::YMworkspace)

    return CUDA.mapreduce(real, +, ymws.cm)/(prod(lp.iL)*lp.npls)
 end
-    
+
+"""
+        function hamiltonian(mom, U, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace)
+
+Returns the Energy ``H = \\frac{p^2}{2}+S[U]``, where the momenta field is given by `mom` and the configuration by `U`.
+"""
 function hamiltonian(mom, U, lp, gp, ymws)
    @timeit "Computing Hamiltonian" begin
        K = CUDA.mapreduce(norm2, +, mom)/2 
@ -58,6 +68,12 @@ function hamiltonian(mom, U, lp, gp, ymws)
    return K+V
 end

+
+"""
+        HMC!(U, int::IntrScheme, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace; noacc=false, rng=Random.default_rng(), curng=CUDA.default_rng())
+
+Performs a HMC step (molecular dynamics integration and accept/reject step). The configuration `U` is updated and function returns the energy violation and if the configuration was accepted in a tuple.
+"""
 function HMC!(U, int::IntrScheme, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace{T}; noacc=false, rng=Random.default_rng(), curng=CUDA.default_rng()) where T

    @timeit "HMC trayectory" begin
@ -92,6 +108,11 @@ function HMC!(U, int::IntrScheme, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspac
 end
 HMC!(U, eps, ns, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace{T}; noacc=false, rng=Random.default_rng(), curng=CUDA.default_rng()) where T = HMC!(U, omf4(T, eps, ns), lp, gp, ymws; noacc=noacc, rng, curng)

+"""
+        function MD!(mom, U, int::IntrScheme, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace)
+
+Performs the integration of a molecular dynamics trajectory starting from the momentum field `mom` and the configuration `U` according to the integrator described by `int`.
+"""
 function MD!(mom, U, int::IntrScheme{NI, T}, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace{T}) where {NI, T <: AbstractFloat}
    
    @timeit "MD evolution" begin
--- a/src/YM/YMio.jl
+++ b/src/YM/YMio.jl
@ -75,7 +75,7 @@ function read_cnfg(fname::String)
    end

    if ibc == BC_SF_AFWB || ibc == BC_SF_ORBI
-        BDIO_read(fb, V)
+        BDIO_read(fb, vec(V))
        Ubnd = ntuple(i->assign(i, V, 1), 3)
        BDIO_close!(fb)

@ -297,3 +297,50 @@ function import_cern64(fname, ibc, lp::SpaceParm; log=true)

    return CuArray(Ucpu)
 end
+
+
+
+"""
+    read_gp(fname::String)
+
+Reads Gauge parameters from file `fname` using the native (BDIO) format. Returns GaugeParm and SpaceParm.
+"""
+function read_gp(fname::String)
+
+    UID_HDR = 14
+    fb = BDIO_open(fname, "r")
+    while BDIO_get_uinfo(fb) != UID_HDR
+        BDIO_seek!(fb)
+    end
+    ihdr = Vector{Int32}(undef, 2)
+    BDIO_read(fb, ihdr)
+    if (ihdr[1] != convert(Int32, 1653996111)) && (ihdr[2] != convert(Int32, 2))
+        error("Wrong file format [header]")
+    end
+
+    run = BDIO.BDIO_read_str(fb)
+
+    while BDIO_get_uinfo(fb) != 1
+        BDIO_seek!(fb)
+    end
+
+    ifoo = Vector{Int32}(undef, 4)
+    BDIO_read(fb, ifoo)
+    ndim = convert(Int64, ifoo[1])
+    npls = convert(Int64, round(ndim*(ndim-1)/2))
+    ibc  = convert(Int64, ifoo[2])
+    nf   = ifoo[4]
+    
+    ifoo = Vector{Int32}(undef, ndim+convert(Int32, npls))
+    BDIO_read(fb, ifoo)
+    iL  = ntuple(i -> convert(Int64, ifoo[i]),ndim)
+    ntw = ntuple(i -> convert(Int64, ifoo[i+ndim]), npls)
+
+    dfoo = Vector{Float64}(undef, 4)
+    BDIO_read(fb, dfoo)
+
+    lp = SpaceParm{ndim}(iL, (4,4,4,4), ibc, ntw)
+    gp = GaugeParm{Float64}(SU3{Float64}, dfoo[1], dfoo[2])
+    
+    return gp, lp
+end
--- a/src/YM/YMsf.jl
+++ b/src/YM/YMsf.jl
@ -10,9 +10,9 @@
 ###                               

 """ 
-    sfcoupling(U, lp::SpaceParm{N,M,B,D}, gp::GaugeParm, ymws::YMworkspace) where {N,M,B,D}
+    sfcoupling(U, lp::SpaceParm, gp::GaugeParm, ymws::YMworkspace) 

-Measures the Schrodinger Functional coupling `ds/d\eta` and `d^2S/d\eta d\nu`. 
+Measures the Schrodinger Functional coupling ``{\\rm d}S/{\\rm d}\\eta`` and ``{\\rm d}^2S/{\\rm d}\\eta d\nu``. 
 """
 function sfcoupling(U, lp::SpaceParm{N,M,B,D}, gp::GaugeParm, ymws::YMworkspace) where {N,M,B,D}

@ -89,7 +89,11 @@ end
    return exp(X)
 end
    
+"""
+        function setbndfield(U, phi, lp::SpaceParm)

+Sets abelian boundary fields with phases `phi[1]` and `phi[2]` to the configuration `U` at time salice ``x_0=0``.
+"""
 function setbndfield(U, phi, lp::SpaceParm{N,M,B,D}) where {N,M,B,D}

    CUDA.@sync begin