Difference between revisions of "CP2K compiling with Intel"
(→CP2K) |
(→CP2K basic openmpi) |
||
(3 intermediate revisions by the same user not shown) | |||
Line 99: | Line 99: | ||
==== CP2K ==== | ==== CP2K ==== | ||
− | |||
<pre> | <pre> | ||
# Tested with: intel 19.1.1.217, OPENMPICH 4.0.3, OpenBLAS 0.3.0, ScaLAPACK 2.1.0, FFTW 3.3.8, Libint 2.6.0, Libxc 4.3.4, libxsmm 1.15, SPGLIB 1.12.2 | # Tested with: intel 19.1.1.217, OPENMPICH 4.0.3, OpenBLAS 0.3.0, ScaLAPACK 2.1.0, FFTW 3.3.8, Libint 2.6.0, Libxc 4.3.4, libxsmm 1.15, SPGLIB 1.12.2 | ||
Line 134: | Line 133: | ||
</pre> | </pre> | ||
− | |||
Before running the test, raise the stack-limit to avoid segmentation faults. | Before running the test, raise the stack-limit to avoid segmentation faults. | ||
Line 173: | Line 171: | ||
==== CP2K basic openmpi ==== | ==== CP2K basic openmpi ==== | ||
− | < | + | <pre> |
− | # Tested with: | + | # Tested with: Intel 19.1.1.217, OPENMPICH 4.0.3, OpenBLAS 0.3.0, ScaLAPACK 2.1.0, FFTW 3.3.8, Libint 2.6.0, Libxc 4.3.4, libxsmm 1.15, |
− | # Author: David Dubbeldam (d.dubbeldam@uva.nl, University of Amsterdam, May 2020) | + | # ELPA 2019.11.001, PLUMED 2.6.0, SPGLIB 1.12.2, QUIP-2015, ParMETIS-4.0.3, Pexsi-1.2.0, SIRIUS 6.5.3, SpFFT 0.9.9, libvdw 0.4.0 |
− | + | # Author: David Dubbeldam (d.dubbeldam@uva.nl, University of Amsterdam, May 2020) | |
− | CC | + | |
− | FC | + | CC = /usr/lib64/openmpi4-intel/bin/mpicc |
− | LD | + | FC = /usr/lib64/openmpi4-intel/bin/mpifort |
− | AR | + | LD = /usr/lib64/openmpi4-intel/bin/mpifort |
− | + | AR = ar -r | |
− | CFLAGS | + | |
− | + | CFLAGS = $(DFLAGS) -O2 -xcore-avx2 -fp-model precise -I/opt/include | |
− | DFLAGS | + | |
− | + | DFLAGS = -D__MPI_VERSION=4 -D__parallel -D__SCALAPACK | |
− | FCFLAGS | + | DFLAGS += -D__MKL -D__FFTW3 -D__LIBXC -D__LIBINT -D__LIBXSMM -D__SPGLIB |
− | FCFLAGS | + | DFLAGS += -D__SIRIUS -D__PLUMED2 -D__QUIP -D__LIBPEXSI -D__ELPA |
− | FCFLAGS | + | |
− | + | FCFLAGS = $(DFLAGS) -O2 -funroll-loops -fpp -free -xcore-avx2 -I/opt/include | |
− | FCFLAGS | + | FCFLAGS += -fp-model precise |
− | + | FCFLAGS += -I${MKLROOT}/include -I${MKLROOT}/include/fftw | |
− | LDFLAGS = $(FCFLAGS) -static-intel | + | FCFLAGS += -I/opt/include -I/opt/include/sirius/ -I/opt/lib/libint2-intel-max6/include |
− | LDFLAGS_C = $(FCFLAGS) -static-intel -nofor_main | + | FCFLAGS += -I/opt/include/elpa-2019.11.001/elpa -I/opt/include/elpa-2019.11.001/modules |
− | + | ||
− | INTEL_MKL_LIB = ${MKLROOT}/lib/intel64 | + | LDFLAGS = $(FCFLAGS) -static-intel |
− | + | LDFLAGS_C = $(FCFLAGS) -static-intel -nofor_main | |
− | LIBS | + | |
− | LIBS | + | INTEL_MKL_LIB = ${MKLROOT}/lib/intel64 |
− | LIBS | + | |
− | LIBS | + | LIBS = $(PLUMED_DEPENDENCIES) -lplumed |
− | LIBS | + | LIBS += /opt/lib/libint2-intel-max6/lib/libint2.a |
− | + | LIBS += /opt/lib/libelpa.a | |
− | + | LIBS += /opt/lib64/libsirius.a | |
− | + | LIBS += /opt/lib64/libpexsi.a | |
− | LIBS | + | LIBS += /opt/lib64/libsuperlu_dist.a |
− | + | LIBS += /opt/lib/libspfft.a | |
+ | LIBS += /opt/lib/libvdwxc.a | ||
+ | LIBS += /opt/lib/libquip_core.a /opt/lib/libatoms.a | ||
+ | LIBS += /opt/lib/libFoX_sax.a | ||
+ | LIBS += /opt/lib/libFoX_common.a | ||
+ | LIBS += /opt/lib/libFoX_utils.a | ||
+ | LIBS += /opt/lib/libFoX_fsys.a | ||
+ | LIBS += /opt/lib/libparmetis.a /opt/lib/libmetis.a | ||
+ | LIBS += /opt/lib/libgsl.a /opt/lib/libgslcblas.a | ||
+ | LIBS += /opt/lib/libxcf03.a /opt/lib/libxc.a | ||
+ | LIBS += /opt/lib/libxsmmf.a /opt/lib/libxsmm.a | ||
+ | LIBS += $(MKLROOT)/interfaces/fftw3xf/libfftw3xf_intel.a \ | ||
+ | -Wl,--start-group \ | ||
+ | -lmkl_scalapack_lp64 \ | ||
+ | -lmkl_blacs_openmpi_lp64 \ | ||
+ | -lmkl_intel_lp64 \ | ||
+ | -lmkl_sequential \ | ||
+ | -lmkl_core \ | ||
+ | -Wl,--end-group | ||
+ | LIBS += -lnetcdf -lhdf5_hl -lhdf5 -lz -lsymspg -ldl -liomp5 -lpthread -lstdc++ -lmpi_mpifh -lmpi_cxx -lmpi | ||
+ | </pre> | ||
+ | |||
+ | Prepare for the regression tests | ||
+ | module load mpi/openmpi4-x86_64-intel | ||
+ | ulimit -c unlimited | ||
+ | export OMPI_ALLOW_RUN_AS_ROOT=1 | ||
+ | export OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 | ||
+ | export OMPI_MCA_btl_openib_allow_ib=1 | ||
+ | export OMPI_MCA_btl_vader_single_copy_mechanism=none | ||
+ | export OMPI_MCA_btl=openib,self,vader | ||
+ | export OMPI_MCA_pml=^ucx | ||
+ | export FI_PROVIDER=ofi_rxm | ||
+ | export FI_PROVIDER="verbs;ofi_rxm" | ||
+ | |||
Latest revision as of 14:12, 11 June 2020
Compiling
Contents
Compiling subpackages
Serial
preparation
source /opt/intel/compilers_and_libraries_2020/linux/bin/compilervars.sh intel64 source /opt/intel/compilers_and_libraries_2020/linux/mkl/bin/mklvars.sh intel64 export AR="xiar" export FC="ifort" export CC="icc" export CXX="icpc" export F77=${FC} export F90=${FC}
Gsl
Reduced optimization to "-O1", but with intel-compiler still fails LinAlg-unit test
wget http://gnu.askapache.com/gsl/gsl-2.6.tar.gz tar -zxvf gsl-2.6.tar.gz ./configure --prefix=/usr/local CC=icc FC=ifort CXX=icpc FCFLAGS="-O1 -xcore-avx2" CFLAGS="-O1 -xcore-avx2" CXXFLAGS="-O1 -xcore-avx2" --enable-shared=no make -j 16 make check
Libint2
Reduced optimization to "-O2"
wget https://github.com/cp2k/libint-cp2k/releases/download/v2.6.0/libint-v2.6.0-cp2k-lmax-4.tgz wget https://github.com/cp2k/libint-cp2k/releases/download/v2.6.0/libint-v2.6.0-cp2k-lmax-5.tgz wget https://github.com/cp2k/libint-cp2k/releases/download/v2.6.0/libint-v2.6.0-cp2k-lmax-6.tgz wget https://github.com/cp2k/libint-cp2k/releases/download/v2.6.0/libint-v2.6.0-cp2k-lmax-7.tgz tar -zxvf libint-v2.6.0-cp2k-lmax-5.tgz cd libint-v2.6.0-cp2k-lmax-5 ./configure --prefix=/usr/local CC=icc FC=ifort CXX=icpc FCFLAGS="-O2 -xcore-avx2" CFLAGS="-O2 -xcore-avx2" CXXFLAGS="-O2 -xcore-avx2" --enable-shared=no --enable-fortran --with-cxx-optflags="-O2 -xcore-avx2" make -j 24 make -j 24 check
all tests ok
edit Makefile in directory fortran line 12 should read (-lstdc++ added)
COMPUTE_LIB = -L../lib -lint2 -lstdc++
line 37 change CXX to FC
$(LTLINK) $(FC) $(CXXFLAGS) $(LDFLAGS) $(COMPUTE_LIB) $(SYSLIBS) $(FCLIBS) -o $@ $^ sudo make install sudo make install_fortran
/usr/local/lib/libint2.a
Libxsmm
A library for improved performance for matrix multiplication and deep learning primitives
wget https://www.cp2k.org/static/downloads/libxsmm-1.15.tar.gz tar -zxvf libxsmm-1.15.tar.gz sudo make PREFIX=/usr/local FC=/opt/intel/compilers_and_libraries_2020.1.217/linux/bin/intel64/ifort CC=/opt/intel/compilers_and_libraries_2020.1.217/linux/bin/intel64/icc CXX=/opt/intel/compilers_and_libraries_2020.1.217/linux/bin/intel64/icpc FCFLAGS="-O2 -xcore-avx2" CFLAGS="-O2 -xcore-avx2" CXXFLAGS="-O2 -xcore-avx2" STATIC=1 NO_SHARED=1 MALLOC=0 -j 24 install
all 16 test okay
make PREFIX=/usr/local STATIC=1 NO_SHARED=1 install
/usr/local/lib/libxsmm.a
/usr/local/lib/libxsmmext.a
/usr/local/lib/libxsmmf.a
/usr/local/lib/libxsmmgen.a
/usr/local/lib/libxsmmnoblas.a
Libxc
wget https://www.cp2k.org/static/downloads/libxc-4.3.4.tar.gz tar -zxvf libxc-4.3.4.tar.gz cd libxc-4.3.4 ./configure --prefix=/usr/local CC=icc FC=ifort CXX=icpc FCFLAGS="-O2 -xcore-avx2" CFLAGS="-O2 -xcore-avx2" CXXFLAGS="-O2 -xcore-avx2" --enable-shared=no --enable-static=yes --enable-fortran make -j 24 make check
PASS: xc-run_testsuite
============================================================================
Testsuite summary for libxc 4.3.4
============================================================================
# TOTAL: 1
# PASS: 1
sudo make install sudo cp src/*.mod /usr/local/include/
/usr/local/lib/libxc.a
/usr/local/lib/libxcf03.a
/usr/local/lib/libxcf90.a
CP2K
# Tested with: intel 19.1.1.217, OPENMPICH 4.0.3, OpenBLAS 0.3.0, ScaLAPACK 2.1.0, FFTW 3.3.8, Libint 2.6.0, Libxc 4.3.4, libxsmm 1.15, SPGLIB 1.12.2 # Author: David Dubbeldam (d.dubbeldam@uva.nl, University of Amsterdam, May 2020) CC = icc FC = ifort LD = ifort AR = ar -r CFLAGS = $(DFLAGS) -O2 -xcore-avx2 -fp-model precise -I/opt/include DFLAGS = -D__MKL -D__FFTW3 -D__LIBXC -D__LIBINT -D__LIBXSMM -D__SPGLIB FCFLAGS = $(DFLAGS) -O2 -funroll-loops -fpp -free -xcore-avx2 -I/opt/include FCFLAGS += -fp-model precise FCFLAGS += -I${MKLROOT}/include -I${MKLROOT}/include/fftw FCFLAGS += -I/opt/include -I/opt/lib/libint2-intel-max6/include LDFLAGS = $(FCFLAGS) -static-intel LDFLAGS_C = $(FCFLAGS) -static-intel -nofor_main INTEL_MKL_LIB = ${MKLROOT}/lib/intel64 LIBS = /opt/lib/libgsl.a /opt/lib/libgslcblas.a LIBS += /opt/lib/libxcf03.a /opt/lib/libxc.a LIBS += /opt/lib/libxsmmf.a /opt/lib/libxsmm.a LIBS += /opt/lib/libint2-intel-max6/lib/libint2.a LIBS += -Wl,--start-group \ $(MKLROOT)/interfaces/fftw3xf/libfftw3xf_intel.a \ -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lmkl_lapack95_lp64 -lmkl_blas95_lp64 \ -Wl,--end-group -lpthread -lm LIBS += -lz -lsymspg -ldl -lpthread -lstdc++
Before running the test, raise the stack-limit to avoid segmentation faults.
ulimit -s unlimited
Summary --------------------------------
Number of FAILED tests 0
Number of WRONG tests 4
Number of CORRECT tests 3197
Number of NEW tests 3
Total number of tests 3204
GREPME 0 4 3197 3 3204 X
Summary: correct: 3197 / 3204; new: 3; wrong: 4; 50min
Status: FAILED
MPI
preparation
source /opt/intel/compilers_and_libraries_2020/linux/bin/compilervars.sh intel64 source /opt/intel/compilers_and_libraries_2020/linux/mkl/bin/mklvars.sh intel64 export AR="xiar" export FC="/usr/lib64/openmpi4-intel/bin/mpifort" export CC="/usr/lib64/openmpi4-intel/bin/mpicc" export CXX="/usr/lib64/openmpi4-intel/bin/mpicxx" export F77=${FC} export F90=${FC} module load mpi/openmpi4-x86_64-intel export OMPI_MCA_pml=^ucx export OMPI_MCA_btl=self,tcp export OMPI_MCA_btl_openib_allow_ib=1 export OMPI_MCA_btl_vader_single_copy_mechanism=none ulimit -s unlimited
CP2K basic openmpi
# Tested with: Intel 19.1.1.217, OPENMPICH 4.0.3, OpenBLAS 0.3.0, ScaLAPACK 2.1.0, FFTW 3.3.8, Libint 2.6.0, Libxc 4.3.4, libxsmm 1.15, # ELPA 2019.11.001, PLUMED 2.6.0, SPGLIB 1.12.2, QUIP-2015, ParMETIS-4.0.3, Pexsi-1.2.0, SIRIUS 6.5.3, SpFFT 0.9.9, libvdw 0.4.0 # Author: David Dubbeldam (d.dubbeldam@uva.nl, University of Amsterdam, May 2020) CC = /usr/lib64/openmpi4-intel/bin/mpicc FC = /usr/lib64/openmpi4-intel/bin/mpifort LD = /usr/lib64/openmpi4-intel/bin/mpifort AR = ar -r CFLAGS = $(DFLAGS) -O2 -xcore-avx2 -fp-model precise -I/opt/include DFLAGS = -D__MPI_VERSION=4 -D__parallel -D__SCALAPACK DFLAGS += -D__MKL -D__FFTW3 -D__LIBXC -D__LIBINT -D__LIBXSMM -D__SPGLIB DFLAGS += -D__SIRIUS -D__PLUMED2 -D__QUIP -D__LIBPEXSI -D__ELPA FCFLAGS = $(DFLAGS) -O2 -funroll-loops -fpp -free -xcore-avx2 -I/opt/include FCFLAGS += -fp-model precise FCFLAGS += -I${MKLROOT}/include -I${MKLROOT}/include/fftw FCFLAGS += -I/opt/include -I/opt/include/sirius/ -I/opt/lib/libint2-intel-max6/include FCFLAGS += -I/opt/include/elpa-2019.11.001/elpa -I/opt/include/elpa-2019.11.001/modules LDFLAGS = $(FCFLAGS) -static-intel LDFLAGS_C = $(FCFLAGS) -static-intel -nofor_main INTEL_MKL_LIB = ${MKLROOT}/lib/intel64 LIBS = $(PLUMED_DEPENDENCIES) -lplumed LIBS += /opt/lib/libint2-intel-max6/lib/libint2.a LIBS += /opt/lib/libelpa.a LIBS += /opt/lib64/libsirius.a LIBS += /opt/lib64/libpexsi.a LIBS += /opt/lib64/libsuperlu_dist.a LIBS += /opt/lib/libspfft.a LIBS += /opt/lib/libvdwxc.a LIBS += /opt/lib/libquip_core.a /opt/lib/libatoms.a LIBS += /opt/lib/libFoX_sax.a LIBS += /opt/lib/libFoX_common.a LIBS += /opt/lib/libFoX_utils.a LIBS += /opt/lib/libFoX_fsys.a LIBS += /opt/lib/libparmetis.a /opt/lib/libmetis.a LIBS += /opt/lib/libgsl.a /opt/lib/libgslcblas.a LIBS += /opt/lib/libxcf03.a /opt/lib/libxc.a LIBS += /opt/lib/libxsmmf.a /opt/lib/libxsmm.a LIBS += $(MKLROOT)/interfaces/fftw3xf/libfftw3xf_intel.a \ -Wl,--start-group \ -lmkl_scalapack_lp64 \ -lmkl_blacs_openmpi_lp64 \ -lmkl_intel_lp64 \ -lmkl_sequential \ -lmkl_core \ -Wl,--end-group LIBS += -lnetcdf -lhdf5_hl -lhdf5 -lz -lsymspg -ldl -liomp5 -lpthread -lstdc++ -lmpi_mpifh -lmpi_cxx -lmpi
Prepare for the regression tests
module load mpi/openmpi4-x86_64-intel ulimit -c unlimited export OMPI_ALLOW_RUN_AS_ROOT=1 export OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 export OMPI_MCA_btl_openib_allow_ib=1 export OMPI_MCA_btl_vader_single_copy_mechanism=none export OMPI_MCA_btl=openib,self,vader export OMPI_MCA_pml=^ucx export FI_PROVIDER=ofi_rxm export FI_PROVIDER="verbs;ofi_rxm"
Summary --------------------------------
Number of FAILED tests 0
Number of WRONG tests 3
Number of CORRECT tests 3198
Number of NEW tests 3
Total number of tests 3204
GREPME 0 3 3198 3 3204 X
Summary: correct: 3198 / 3204; new: 3; wrong: 3; 8min
Status: FAILED