#
#     Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
#
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto.  Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
#

set CONCUR=YES;

variable OMPLIBDEBUG is default();
variable OMPLIBDEVDEBUG is default();
# Indicate whether -mp is passed in explicitly
variable OMPSETEXPLICITLY is default(0);
# Indicate the need for OpenMP offload if -mp is passed and set to gpu.
variable NEEDOMPOFFLOAD is default($if($OMPSETEXPLICITLY, $if($expr($TGTOMP & $TGTGPU), 1, 0), 0));
variable USENVOMPHOSTLIB is default(1);
variable USENVOMPDEVSTATICLIB is default(0);
variable USENVOMPNVCCBITCODE is default(1);
variable NVOMP_DEV_CUDAXY is default($if($CUDAXY,$if($expr($CUDAXY<113),_110,
                                                 $if($expr($CUDAXY<118),_113,
                                                 $if($expr($CUDAXY<120),_118,
                                                 $if($expr($CUDAXY<127),_120,
                                                 $if($expr($CUDAXY<129),_128,
                                                 $if($expr($CUDAXY<130),_129))))))));
variable NVLIBC_DEV is default($(LIBSW)nvlibc_dev$(NVOMP_DEV_CUDAXY)$(OMPLIBDEVDEBUG));
variable HXRT_DEV is default($(LIBSW)hxrt_dev$(NVOMP_DEV_CUDAXY)$(OMPLIBDEVDEBUG));
variable NVOMP_DEV is default($(LIBSW)nvomp_dev$(NVOMP_DEV_CUDAXY)$(OMPLIBDEVDEBUG));
variable OMPDEVLIB is default($if($USENVOMPDEVSTATICLIB,$if($NEEDOMPOFFLOAD,$if($USENVOMPDEVSTATICLIB,$(NVOMP_DEV)) $HXRT_DEV)));
variable NEEDOMPT is default(0);
variable OMPTSUFFIX is default();

# NVOMP
variable NVOMPLIB is default($if($USENVOMPHOSTLIB,$(LIBSW)nvomp$(OMPTSUFFIX)$(OMPLIBDEBUG)));
variable NVOMPBCLIB is default();
variable NVOMPOBJLIB is default();
variable NVOMPLIBS is default($if($USENVOMPHOSTLIB,$if($and($NEEDACCLIB,$ISSTATIC),,$NVOMPLIB) $PGIUNSTATIC $DLLIB,$NVOMPOBJLIB));
variable NEEDOMPSTUBLIB is default(0);
variable NVOMPSTUBLIB is default($if($NEEDOMPSTUBLIB,-lnvompstub$if($ISSTATIC,_static)));

# Generic variable to hold OpenMP library, default to NVOMP RT
variable OMPLIB is default($NVOMPLIBS);

# USEOTHEROMPLIB is set when we want to use another OpenMP RT instead of the default NVOMP RT.
variable USEOTHEROMPLIB is default(0);

# Interoperability OpenACC/OpenMP
variable OMPINTEROPSTART is default();
variable OMPINTEROPEND is default($NVOMPLIB);

# Select only CC compatible with OpenMP GPU Offload
variable OMPCOMPUTECAPS is default($if($LNGOMP,$foreach(cc,$COMPUTECAP,$if($expr($cc >= 70),$cc ))));

# Set 1 when `-nomp` is used
variable NOMP is default(0);

# Indicates compilation for OpenMP GPU.
variable OMPGPU is default($if($OMPSETEXPLICITLY, $expr($TGTOMP & $TGTGPU)));
# Indicates compilation for OpenMP multicore.
# Note that since we always compile for multicore, this variable might not be strichtly required.
variable OMPCPU is default($if($OMPSETEXPLICITLY, $expr($TGTOMP & $TGTCPU)));

append USRDDEF=$if($OMPGPU,-D__NVCOMPILER_OPENMP_GPU) $if($OMPCPU,-D__NVCOMPILER_OPENMP_MULTICORE);
append USRDEFDEF=$if($OMPGPU,-def __NVCOMPILER_OPENMP_GPU) $if($OMPCPU,-def __NVCOMPILER_OPENMP_MULTICORE);

# -mp={gpu|gpu,multicore|multicore}
#   gpu             OpenMP target regions compiled for GPU execution only
#   gpu,multicore   (default) OpenMP target regions compiled for GPU (default) or multicore CPU execution
#   multicore       OpenMP directives are compiled for multicore CPU execution only (default)

switch -mp is
    error($if($ISNVNG,Support for OpenMP is not yet implemented.))
    help(Enable OpenMP (implies -Mrecursive in Fortran))
    helpname(-mp)
    helpgroup(target)
    set(LNGOMP=1)
    set(NOMP=0)
    set(OMPSETEXPLICITLY=1)
    # Multicore (always on in OpenMP)
    set(CONCUR=YES)
    set(CONCURSW=YES)
    append(MPSETDEFCPP=--mp)
    set(CONCURCOMPILE=YES)
    set(MPSETDEF=-mp -x 69 0x200) # MPSET gets this value, unless -Mpfi
    set(DEFNOSGIMP=)
    set(DEFNOOPENMP=)
    set(NOSGIMP=$if($index($CG,llvm),-x 69 2))
    set(COMPLIBMP=$foreach(dd,$COMPLIBSUBDIR,$if($isdir($COMPBASE/$COMPSYS/$COMPVER/$dd/mp),$COMPBASE/$COMPSYS/$COMPVER/$dd/mp)))
    set(STDRPATHMP=$foreach(dd,$COMPLIBSUBDIR,$if($isdir($COMPBASE/$COMPSYS/$COMPVER/$dd/mp),-rpath $COMPBASE/$COMPSYS/$COMPVER/$dd/mp)))
    set(MPFAIR=-x 69 0x400)
    append(CGARGS=-x 69 0x2000000) # Inhibit invariant hoisting around OMP calls
    append(CPP2ARGS=-x 180 0x4000000)
    append(F902ARGS=-x 180 0x4000000)
    append(FLANG1ARGS=-fopenmp)
    # Multicore (always on in OpenMP)

    # GPU
    fatal($ifn($contains($SYSACCELS,tesla),Target accelerator -mp=gpu is not supported for $PGSYS-$PGLEN systems))
    max(OPTLEVELINITDEF1=$if($or($TGLOMPGPU,$TGTOMPGPU),2,0))
    append(USRDDEF=$if($expr($TGTOMP & $TGTGPU),-D_PGI_HX))
    append(USRDEFDEF=$if($expr($TGTOMP & $TGTGPU),-def _PGI_HX))
    append(CPP1ARGS=$if($expr($TGTOMP & $TGTGPU),--target_gpu))

    # Disable loop bounds check for both CPU and GPU until it gets fully disabled from the compiler (FS#)
    append(CPP2ARGS=-x 194 0x20000000)
    append(F902ARGS=-x 194 0x20000000)

    # Enable GOMP_task instead of kmpc_omp_task_alloc (FS#30011)
    append(CPP2ARGS=-x 251 0x2)
    append(F902ARGS=-x 251 0x2)

    append(CPP2ARGS=$if($expr($TGTOMP & $TGTGPU),-x 233 0x1 -x 205 2))
    append(F901ARGS=$if($expr($TGTOMP & $TGTGPU),-x 233 0x1))
    append(F902ARGS=$if($expr($TGTOMP & $TGTGPU),-x 233 0x1 -x 205 2))
    set(OMPATMFLAGS=$if($expr($TGTOMP & $TGTGPU),))

    # By default enable compilation with LLVM IR NVOMP RT device library (both implicit and explicit offload)
    append(CPP2ARGS=$if($expr($TGTOMP & $TGTGPU),$ifn($USENVOMPDEVSTATICLIB,$if($NEEDOMPOFFLOAD,-x 233 0x100 $if($OMPLIBDEVDEBUG,-x 233 0x200)))))
    append(F901ARGS=$if($expr($TGTOMP & $TGTGPU),$ifn($USENVOMPDEVSTATICLIB,$if($NEEDOMPOFFLOAD,-x 233 0x100 $if($OMPLIBDEVDEBUG,-x 233 0x200)))))
    append(F902ARGS=$if($expr($TGTOMP & $TGTGPU),$ifn($USENVOMPDEVSTATICLIB,$if($NEEDOMPOFFLOAD,-x 233 0x100 $if($OMPLIBDEVDEBUG,-x 233 0x200)))))

    # Enable compilation with LLVM IR NVOMP RT dev# ice library generated with NVCC (both implicit and explicit offload)
    append(CPP2ARGS=$if($expr($TGTOMP & $TGTGPU),$if($USENVOMPNVCCBITCODE,-x 233 0x400)))
    append(F902ARGS=$if($expr($TGTOMP & $TGTGPU),$if($USENVOMPNVCCBITCODE,-x 233 0x400)))

    # GPU compilation
    set(ACCTESLAONLY=$if($and($TGTCUDA,$not($or($LNGSPA,$TA,$LNGACC))),0,$DEFTESLAONLY))
    set(ACCMULTIONLY=$DEFMULTIONLY)
    set(DEFACC=$if($expr($TGTOMP & $TGTGPU),))
    set(DEF901ACC=$if($expr($TGTOMP & $TGTGPU),))

    append(ACCCGFLAGS=$if($expr($TGTOMP & $TGTGPU),-x 180 0x4000400 -x 121 0xc00))
    append(ACCCGFLAGS=$if($expr($TGTOMP & $TGTGPU),$PADDFLAG))
    append(ACCFEFLAGS=$if($expr($TGTOMP & $TGTGPU),$PADDFLAG))

    append(OPTLEVELMINLIMIT=$if($expr($TGTOMP & $TGTGPU)," -ta=tesla"))
    append(ACCCGFLAGS=$if($expr($TGTOMP & $TGTGPU),-x 163 0x1 -x 186 0x80000 $TOOLKITFLAG $ACCCACHE -x 194 0x40000))
    append(ACCELS=$if($expr($TGTOMP & $TGTGPU),tesla))
    set(NEEDACCLIB=$if($ISACCLIBNEEDED,1,0))
    set(CUDARPATHNEEDED=$if($ISCUDARTNEEDED,1,0))
    set(CHECKCUDALIB=$if($ISCUDARTNEEDED,1,0))

    # GPU

    set(OMPLIB=$if($index($CG,llvm),$NVOMPLIBS))

    keyword(
        # GPU Options
        gpu(
            ifn($index($TARGET,win64-llvm))
            help(OpenMP target directives are compiled for GPU execution; please refer to -gpu for target specific options)
            set(NEEDOMPOFFLOAD=1)
            set(TGLOMP=1)
            set(TGLOMPGPU=$TGTGPU)
            set(TGLOMPCPU=$TGTCPU)
            set(TGTOMP=$expr($TGLOMPCPU | $TGLOMPGPU))
            set(NEEDLOCSCRIPT=1)
            # May need managed memory (i.e. omp requires)
            set(MAYNEEDMANAGEDMEMORY=1)
        )
        # GPU Options
        # Multicore Options
        multicore(
            help(OpenMP directives are compiled for multicore CPU execution only (default))
            set(TGLOMP=1)
            set(TGLOMPCPU=$TGTCPU)
            set(TGTOMP=$expr($TGLOMPCPU | $TGLOMPGPU))
        )
        stub(hide
            help(Link in the OpenMP stub library)
            set(NEEDOMPSTUBLIB=1)
        )
        ompt(
            help(Link against OMPT-enabled OpenMP library)
            set(NEEDOMPT=1)
            # Support for exposing `ompt_start_tool` in statically
            # linked application, needed for OMPT support
            set(NEEDNVHPCLDSYMS=1)
            set(OMPTSUFFIX=_ompt)
            append(CGARGS=$ifn($expr($TGTACC & $TGTCPU),-x 251 1))
        )
        align(
            helpname([no]align)
            help(Modify default loop scheduling to prefer aligned array references)
            append(CGARGS=-x 69 4)
            append(CGARGS=$if($expr($TGTOMP & $TGTCPU),-x 69 4))
            append(F901ARGS=$if($expr($TGTOMP & $TGTCPU),-x 69 4))
            set(MPFAIR=$if($expr($TGTOMP & $TGTCPU),))
        )
        noalign(hide
            help(Default loop scheduling will not prefer aligned array references)
            append(CGARGS=$if($expr($TGTOMP & $TGTCPU),-y 69 4))
            append(F901ARGS=$if($expr($TGTOMP & $TGTCPU),-y 69 4))
        )
        autopar(
            ifn($index($TARGET,win64-llvm))
            helpname([no]autopar)
            help(Enable (default) or disable loop autoparallelization within omp loop)
            set(ACCAUTOPAR=$if($expr($TGTACC & $TGTALL),1))
        )
        noautopar(hide
            ifn($index($TARGET,win64-llvm))
            set(ACCAUTOPAR=$if($expr($TGTACC & $TGTALL),0))
        )
        allcores(hide)
        bind(hide)
        numa(hide)
        nonuma(hide)
        residual(hide)
        trace(hide)
        # Multicore Options
    )
    nokeyword()

    # Multicore
    set(MPDEFCPPDEF=-D_OPENMP=202011)
    append(CGARGS=$MPSET $MPFAIR $NOSGIMP $NOOPENMP)
    append(F901ARGS=$MPSET $MPFAIR $NOSGIMP $NOOPENMP)
    append(CPP1ARGS=$MPSETCPP $MPDEFCPPDEF)
    append(CARGS=$if($index($CG,llvm),$OMPATMFLAGS))
    append(F901ARGS=$if($index($CG,llvm),$OMPATMFLAGS))
    append(F902ARGS=$if($index($CG,llvm),$OMPATMFLAGS))
    # Multicore

    # GPU
    append(ACCCGFLAGS=$if($expr($TGTOMP & $TGTGPU),$DEFAULTCAPFLAG));
    # GPU

switch -Mconcur is
    help(Generate parallel loops)
    helpgroup(target)
    set(cyclic=0)
    set(OMPLIB=$if($index($CG,llvm),$NVOMPLIBS))
    set(COMPLIBMP=$foreach(dd,$COMPLIBSUBDIR,$if($isdir($COMPBASE/$COMPSYS/$COMPVER/$dd/mp),$COMPBASE/$COMPSYS/$COMPVER/$dd/mp)))
    set(STDRPATHMP=$foreach(dd,$COMPLIBSUBDIR,$if($isdir($COMPBASE/$COMPSYS/$COMPVER/$dd/mp),-rpath $COMPBASE/$COMPSYS/$COMPVER/$dd/mp)))
    keyword(
        allcores(hide)
        bind(hide)
        altcode:n(
            helpname([no]altcode:<n>)
            help(Execute alternate serial code if loop count is n or less)
            append(CGARGS=-x 44 $n)
        )
        altreduction:n(
            append(CGARGS=-x 43 $n)
            help(Execute alternate serial code if loop has a reduction \\n and count is n or less)
        )
        assoc(
            helpname([no]assoc)
            help(Enable parallelization of loops with associative reductions)
        )
        cncall(
            helpname([no]cncall)
            append(CGARGS=-x 42 4)
            help(Assume loops containing calls are safe to parallelize)
        )
        dist(hide
            keyword(
                block()
                cyclic()
            )
        )
        innermost(
            helpname([no]innermost)
            help(Enable parallelization of innermost loops)
            append(CGARGS=-x 34 0x1000)
        )
        noinnermost(hide)
        levels:n(
            append(CGARGS=-x 30 $n)
            help(Parallelize loops nested at most n levels deep)
        )
        noaltcode(hide
            append(CGARGS=-x 43 0 -x 44 0)
        )
        noassoc(hide
            append(CGARGS=-vect 4 -x 42 0x400000)
        )
        nocncall(hide
            append(CGARGS=-y 42 4)
        )
        numa(hide)
        nonuma(hide)
    )
    nokeyword()
    set(CONCUR=YES)
    set(CONCURSW=YES)
    set(CONCURCOMPILE=YES)
    max(OPTLEVELDEF=2)
    max(OPTLEVELMIN=2)
    set(COMPLIBMP=$foreach(dd,$COMPLIBSUBDIR,$if($isdir($COMPBASE/$COMPSYS/$COMPVER/$dd/mp),$COMPBASE/$COMPSYS/$COMPVER/$dd/mp)))
    set(STDRPATHMP=$foreach(dd,$COMPLIBSUBDIR,$if($isdir($COMPBASE/$COMPSYS/$COMPVER/$dd/mp),-rpath $COMPBASE/$COMPSYS/$COMPVER/$dd/mp)))
    append(OPTLEVELMINLIMIT=" -Mconcur")
    append(F901ARGS=-concur $add(512,$cyclic))
    append(CGARGS=-concur $add(512,$cyclic));

switch -nomp is
    help(Disable OpenMP directives and do not link with OpenMP libraries)
    helpname(-nomp)
    helpgroup(target)

    # Disable both OpenMP host and device
    set(TGLOMP=1)
    set(TGTOMPGPU=0)
    set(TGTOMPCPU=0)
    set(NOMP=1)

    set(CONCUR=YES)
    set(CONCURSW=)
    set(CONCURCOMPILE=)
    set(MPSETDEF=)
    set(MPSETDEFCPP=)
    set(COMPLIBMP=)
    set(STDRPATHMP=)
    set(MPFAIR=)
    set(OMPLINKFILE=)
    set(OMPATMFLAGS=$if($index($CG,llvm),-x 69 0x1000))
    set(MPDEFCPP=);

# Alias `-fopenmp` to `-mp` for compatibility purposes with other compilers
switch -fopenmp is hide shorthand(-mp);
