CP2K compiling with GFortran

From iRASPA
Jump to: navigation, search

Compiling

Caveats

  1. the gcc version 4.8.5 mostly works, but fails for the plumed-part in CP2K (plumed itself compiles fine)
  2. gcc version 9.1.1 fails (compiles but lots of CP2K unit tests fail, even with -O2 -g -mtune=generic)
  3. gcc version 8.3.1 works, but sometimes optimization needs to be reduced (e.g. gsl).

Compiling subpackages

The following assumes that CP2K will be build from static libraries installed in /usr/local using the devtoolset-8 on rocks (centos 7.4).

Serial

preparation

scl enable devtoolset-8 bash
export CC=/opt/rh/devtoolset-8/root/usr/bin/gcc
export CXX=/opt/rh/devtoolset-8/root/usr/bin/g++
export F77=/opt/rh/devtoolset-8/root/usr/bin/gfortran
export FC=/opt/rh/devtoolset-8/root/usr/bin/gfortran

Blas and Lapack

wget https://github.com/Reference-LAPACK/lapack/archive/v3.9.0.tar.gz
mv v3.9.0.tar.gz lapack-3.9.0.tar.gz
tar -zxvf lapack-3.9.0.tar.gz
cd lapack-3.9.0
mkdir build
cd build
cmake .. -DCMAKE_INSTALL_PREFIX=/usr/local -DCMAKE_Fortran_COMPILER=gfortran -DBUILD_SHARED_LIBS=OFF -DBUILD_TESTING=ON -DCMAKE_Fortran_FLAGS="-O3 -march=native -mno-avx512f"
make -j 16
make test

Result: 100% tests passed, 0 tests failed out of 103

sudo make install

/usr/local/lib64/libblas.a
/usr/local/lib64/liblapack.a
/usr/local/lib64/libtmglib.a

OpenBLAS

OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.

wget http://github.com/xianyi/OpenBLAS/archive/v0.3.9.tar.gz
mv v0.3.9.tar.gz OpenBLAS-0.3.9.tar.gz
tar -zxvf OpenBLAS-0.3.9.tar.gz
cd OpenBLAS-0.3.9
make CC=gcc FC=gfortran CXX=g++ FCFLAGS="-O3 -march=native -mno-avx512f" CFLAGS="-O3 -march=native -mno-avx512f" CXXFLAGS="-O3 -march=native -mno-avx512f" USE_THREAD=0 USE_OPENMP=0 NO_SHARED=1 PREFIX=/usr/local NO_AVX512=1
make tests

all tests passed

sudo make PREFIX=/usr/local NO_SHARED=1 install

/usr/local/lib/libopenblas.a

fftw

tar -zxvf fftw-3.3.8.tar.gz
cd fftw-3.3.8
./configure CC=gcc CXX=g++x F77=gfortran FC=gfortran FCFLAGS="-O3 -march=native -mno-avx512f" CFLAGS="-O3 -march=native -mno-avx512f" --prefix=/usr/local --disable-shared --enable-static
make -j 16
make check

FFTW transforms passed basic tests!

sudo make install

The serial version is created

/usr/local/lib/libfftw3.a

Gsl

Reduced optimization to "-O1"

wget http://gnu.askapache.com/gsl/gsl-2.6.tar.gz
tar -zxvf gsl-2.6.tar.gz
./configure --prefix=/usr/local CC=gcc FC=gfortran CXX=g++ FCFLAGS="-O1 -march=native -mno-avx512f" CFLAGS="-O1 -march=native -mno-avx512f" CXXFLAGS="-O1 -march=native -mno-avx512f" --enable-shared=no
make -j 16
make check

============================================================================
Testsuite summary for gsl 2.6
============================================================================
all checks passed

sudo make install

/usr/local/lib/libgslcblas.a
/usr/local/lib/libgsl.a

Libint2

Reduced optimization to "-O2"

wget https://github.com/cp2k/libint-cp2k/releases/download/v2.6.0/libint-v2.6.0-cp2k-lmax-4.tgz
wget https://github.com/cp2k/libint-cp2k/releases/download/v2.6.0/libint-v2.6.0-cp2k-lmax-5.tgz
wget https://github.com/cp2k/libint-cp2k/releases/download/v2.6.0/libint-v2.6.0-cp2k-lmax-6.tgz
wget https://github.com/cp2k/libint-cp2k/releases/download/v2.6.0/libint-v2.6.0-cp2k-lmax-7.tgz
tar -zxvf libint-v2.6.0-cp2k-lmax-5.tgz
cd libint-v2.6.0-cp2k-lmax-5
./configure --prefix=/usr/local CC=gcc FC=gfortran CXX=g++ FCFLAGS="-O2 -march=native -mno-avx512f" CFLAGS="-O2 -march=native -mno-avx512f" CXXFLAGS="-O2 -march=native -mno-avx512f" --enable-shared=no --enable-fortran --with-cxx-optflags="-O2 -march=native -mno-avx512f"
make -j 24
make -j 24 check

all tests ok

sudo make install
sudo make install_fortran

/usr/local/lib/libint2.a

Libxsmm

A library for improved performance for matrix multiplication and deep learning primitives

wget https://www.cp2k.org/static/downloads/libxsmm-1.15.tar.gz
tar -zxvf libxsmm-1.15.tar.gz
make PREFIX=/usr/local FCFLAGS="-O3 -march=native -mno-avx512f" CFLAGS="-O3 -march=native -mno-avx512f" STATIC=1 NO_SHARED=1 MALLOC=0  -j 24

all 16 test okay

make PREFIX=/usr/local STATIC=1 NO_SHARED=1 install

/usr/local/lib/libxsmm.a
/usr/local/lib/libxsmmext.a
/usr/local/lib/libxsmmf.a
/usr/local/lib/libxsmmgen.a
/usr/local/lib/libxsmmnoblas.a

libvdwxc

Libvdwxc is a general library for evaluating energy and potential for exchange-correlation (XC) functionals from the vdW-DF.

wget https://www.cp2k.org/static/downloads/libvdwxc-0.4.0.tar.gz
tar -zxvf libvdwxc-0.4.0.tar.gz
./configure CC=gcc FC=gfortran FCFLAGS="-O3 -march=native -mno-avx512f" FFTW3_INCLUDES="-I/usr/local/include/" FFTW3_LIBS="/usr/local/lib/libfftw3.a" CFLAGS="-O3 -march=native -mno-avx512f"  --prefix=/usr/local --enable-shared=no --enable-static=yes
make -j 24
make check
sudo make install

/usr/local/lib/libvdwxcfort.a
/usr/local/lib/libvdwxc.a

Libxc

wget https://www.cp2k.org/static/downloads/libxc-4.3.4.tar.gz
tar -zxvf libxc-4.3.4.tar.gz
cd libxc-4.3.4
./configure --prefix=/usr/local CC=gcc FC=gfortran CXX=g++ FCFLAGS="-O3 -march=native -mno-avx512f" CFLAGS="-O3 -march=native -mno-avx512f" CXXFLAGS="-O3 -march=native -mno-avx512f" --enable-shared=no --enable-static=yes --enable-fortran
make -j 24
make check

PASS: xc-run_testsuite
============================================================================
Testsuite summary for libxc 4.3.4
============================================================================

  1. TOTAL: 1
  2. PASS: 1
sudo make install
sudo cp src/*.mod /usr/local/include/

/usr/local/lib/libxc.a
/usr/local/lib/libxcf03.a
/usr/local/lib/libxcf90.a

Spglib

A library for finding and handling crystal symmetries

wget https://github.com/spglib/spglib/archive/v1.11.2.1.tar.gz
mv v1.11.2.1.tar.gz spglib-v1.11.2.1.tar.gz
tar -zxvf spglib-v1.11.2.1.tar.gz
cd spglib-1.11.2.1
mkdir build
cd build
cmake .. -DCMAKE_FIND_ROOT_PATH="$ROOTDIR" -DCMAKE_INSTALL_PREFIX=/usr/local -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF
sudo make install

/usr/local/lib/libsymspg.a

Rocks/Centos/RH 7 provides precompiled packages

wget https://download-ib01.fedoraproject.org/pub/epel/7/x86_64/Packages/s/spglib-1.11.1-2.el7.x86_64.rpm
wget https://download-ib01.fedoraproject.org/pub/epel/7/x86_64/Packages/s/spglib-devel-1.11.1-2.el7.x86_64.rpm
sudo rpm -ivh spglib-1.11.1-2.el7.x86_64.rpm spglib-devel-1.11.1-2.el7.x86_64.rpm


CP2K

Reduced optimization to "-O2"

CC = gcc
FC = gfortran
LD = gfortran
AR = ar -r

CFLAGS = -O2 -march=native -mno-avx512f -I/usr/local/include -L/usr/local/include

DFLAGS = -D__FFTW3 -D__LIBXC -D__LIBINT -D__LIBXSMM -D__SPGLIB

FCFLAGS = $(CFLAGS) $(DFLAGS)
FCFLAGS += -ffree-form -ffree-line-length-none
FCFLAGS += -ftree-vectorize -funroll-loops -std=f2008
FCFLAGS += -I/usr/local/include

LDFLAGS = $(FCFLAGS)

LIBS = /usr/local/lib/libgsl.a /usr/local/lib/libgslcblas.a
LIBS += /usr/local/lib/libxcf03.a /usr/local/lib/libxc.a
LIBS += /usr/local/lib/libint2.a
LIBS += /usr/local/lib/libfftw3.a
LIBS += /usr/local/lib/libxsmmf.a /usr/local/lib/libxsmm.a
LIBS += /usr/local/lib/libopenblas.a
LIBS += -lz -lsymspg -ldl -lpthread -lstdc++

FYPPFLAGS = -n --line-marker-format=gfortran5

make ARCH=local VERSION=sopt test

Summary --------------------------------

Number of FAILED tests 0
Number of WRONG tests 0
Number of CORRECT tests 3201
Number of NEW tests 3
Total number of tests 3204
GREPME 0 0 3201 3 3204 X

Summary: correct: 3201 / 3204; new: 3; 27min
Status: OK

OpenMP

preparation

scl enable devtoolset-8 bash
export CC=/opt/rh/devtoolset-8/root/usr/bin/gcc
export CXX=/opt/rh/devtoolset-8/root/usr/bin/g++
export F77=/opt/rh/devtoolset-8/root/usr/bin/gfortran
export FC=/opt/rh/devtoolset-8/root/usr/bin/gfortran

OpenBLAS

OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.

wget http://github.com/xianyi/OpenBLAS/archive/v0.3.9.tar.gz
mv v0.3.9.tar.gz OpenBLAS-0.3.9.tar.gz
tar -zxvf OpenBLAS-0.3.9.tar.gz
cd OpenBLAS-0.3.9
make CC=gcc FC=gfortran CXX=g++ FCFLAGS="-O3 -march=native -mno-avx512f" CFLAGS="-O3 -march=native -mno-avx512f" CXXFLAGS="-O3 -march=native -mno-avx512f" USE_THREAD=1 USE_OPENMP=1 NUM_THREADS=64 LIBNAMESUFFIX=omp NO_SHARED=1 PREFIX=/usr/local NO_AVX512=1

make tests

all tests passed

sudo make PREFIX=/usr/local LIBNAMESUFFIX=omp NO_SHARED=1 install

/usr/local/lib/libopenblas_omp.a


fftw

tar -zxvf fftw-3.3.8.tar.gz
cd fftw-3.3.8
./configure CC=gcc CXX=g++x F77=gfortran FC=gfortran FCFLAGS="-O3 -march=native -mno-avx512f" CFLAGS="-O3 -march=native -mno-avx512f" --prefix=/usr/local --enable-openmp --disable-shared --enable-static --enable-threads
make -j 16
make check

FFTW transforms passed basic tests! FFTW threaded transforms passed basic tests!

sudo make install

The serial version is created

/usr/local/lib/libfftw3_omp.a
/usr/local/lib/libfftw3_threads.a

Elpa

ELPA replaces the ScaLapack SYEVD to improve the performance of Eigen-vector/value problems.

wget http://elpa.mpcdf.mpg.de/html/Releases/2019.11.001/elpa-2019.11.001.tar.gz
tar -zxvf elpa-2019.11.001.tar.gz
cd elpa-2019.11.001
./configure FC=/usr/lib64/openmpi4-gcc8/bin/mpifort CC=/usr/lib64/openmpi4-gcc8/bin/mpicc CXX=/usr/lib64/openmpi4-gcc8/bin/mpicxx FCFLAGS="-O3 -march=native -mno-avx512f" CFLAGS="-O3 -march=native -mno-avx512f"  --prefix=/usr/local --enable-shared=no --enable-static=yes --enable-openmp=yes --disable-avx512

hack: change LIBS in Makefile: LIBS = /usr/local/lib/libscalapack.a /usr/local/lib/libopenblas_omp.a

make check

some skipped, all others passed

sudo make install

/usr/local/include/elpa_openmp-2019.11.001/modules/elpa_constants.mod
/usr/local/include/elpa_openmp-2019.11.001/modules/elpa_api.mod
/usr/local/include/elpa_openmp-2019.11.001/modules/elpa.mod
/usr/local/lib/libelpa_openmp.a


MPI

preparation

scl enable devtoolset-8 bash
module load mpi/openmpi4-x86_64-gcc8
export CC=/usr/lib64/openmpi4-gcc8/bin/mpicc
export CXX=/usr/lib64/openmpi4-gcc8/bin/mpicxx
export F77=/usr/lib64/openmpi4-gcc8/bin/mpif77
export FC=/usr/lib64/openmpi4-gcc8/bin/mpifort

Allow infiniband and vader to run

export OMPI_MCA_btl_openib_allow_ib=1
export OMPI_MCA_btl_vader_single_copy_mechanism=none

Note that the used MPI installation must match the used Fortran compiler. If not, then MPI needs to be recompiled using the same compiler.


Blas and Lapack

Lapack is needed for SIRIUS

wget https://github.com/Reference-LAPACK/lapack/archive/v3.9.0.tar.gz
mv v3.9.0.tar.gz lapack-3.9.0.tar.gz
tar -zxvf lapack-3.9.0.tar.gz
cd lapack-3.9.0
mkdir build
cd build
cmake .. -DCMAKE_INSTALL_PREFIX=/usr/local -DCMAKE_Fortran_COMPILER=gfortran -DBUILD_SHARED_LIBS=OFF -DBUILD_TESTING=ON -DCMAKE_Fortran_FLAGS="-O3 -march=native -mno-avx512f"
make -j 16
make test

Result: 100% tests passed, 0 tests failed out of 103

sudo make install

/usr/local/lib64/libblas.a
/usr/local/lib64/liblapack.a
/usr/local/lib64/libtmglib.a


OpenBLAS

OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.

wget http://github.com/xianyi/OpenBLAS/archive/v0.3.9.tar.gz
mv v0.3.9.tar.gz OpenBLAS-0.3.9.tar.gz
tar -zxvf OpenBLAS-0.3.9.tar.gz
cd OpenBLAS-0.3.9
make CC=gcc FC=gfortran CXX=g++ FCFLAGS="-O3 -march=native -mno-avx512f" CFLAGS="-O3 -march=native -mno-avx512f" CXXFLAGS="-O3 -march=native -mno-avx512f" USE_THREAD=0 USE_OPENMP=0 NO_SHARED=1 PREFIX=/usr/local NO_AVX512=1
make tests

all tests passed

sudo make PREFIX=/usr/local NO_SHARED=1 install

/usr/local/lib/libopenblas.a

Scalapack

wget https://www.cp2k.org/static/downloads/scalapack-2.1.0.tgz
tar -zxvf scalapack-2.1.0.tgz
scalapack-2.1.0
mkdir build
cd build
cmake .. -DCMAKE_FIND_ROOT_PATH="$ROOTDIR" -DCMAKE_INSTALL_PREFIX=/usr/local -DCMAKE_Fortran_FLAGS="-O3 -march=native -mno-avx512f" -DCMAKE_C_FLAGS="-O3 -march=native -mno-avx512f" -DCMAKE_BUILD_TYPE=Release -DMPI_BASE_DIR=/usr/lib64/openmpi4-gcc9/lib -DMPI_INCLUDE_PATH=/usr/include/openmpi4-x86_64-gcc8 -DMPIEXEC=/usr/lib64/openmpi4-gcc8/bin/mpiexec -DBLAS_LIBRARIES=/usr/local/lib/libopenblas.a -DLAPACK_LIBRARIES=/usr/local/lib/libopenblas.a -DBUILD_SHARED_LIBS=OFF -DBUILD_TESTING=ON
make -j 16
make test

100% tests passed, 0 tests failed out of 96

sudo make install

/usr/local/lib/linscalapack.a

COSMA

Distributed Communication-Optimal Matrix-Matrix Multiplication Algorithm. COSMA is a replacement of the pdgemm routine included in scalapack.

git clone --recursive https://github.com/eth-cscs/COSMA cosma 
cd cosma
mkdir build
cd build
cmake3 .. -DCMAKE_INSTALL_PREFIX=/usr/local -DCMAKE_INSTALL_INCLUDEDIR=/usr/local/include -DCMAKE_BUILD_TYPE=Release -DCOSMA_BLAS=OPENBLAS -DCOSMA_SCALAPACK=CUSTOM -DBUILD_SHARED_LIBS=OFF -DSCALAPACK_LIBRARIES="/usr/local/lib/libscalapack.a;/usr/local/lib/libopenblas.a;/opt/rh/devtoolset-8/root/usr/lib/gcc/x86_64-redhat-linux/8/libgfortran.a;/opt/rh/devtoolset-8/root/usr/lib/gcc/x86_64-redhat-linux/8/libquadmath.a" -DOPENBLAS_LIBRARIES="/usr/local/lib/libopenblas.a;/opt/rh/devtoolset-8/root/usr/lib/gcc/x86_64-redhat-linux/8/libgfortran.a"
make test

20% tests passed, 4 tests failed out of 5

Total Test time (real) = 7.99 sec

The following tests FAILED:
2 - test.multiply_using_layout (Failed)
3 - test.multiply (Failed)
4 - test.scalar_matmul (Failed)
5 - test.pdgemm (Failed)
Errors while running CTest

Place the library in front of the scalapack library during linking time.

fftw

tar -zxvf fftw-3.3.8.tar.gz
cd fftw-3.3.8
module load mpi/openmpi4-x86_64-gcc8
export OMPI_MCA_btl_openib_allow_ib=1
export OMPI_MCA_btl_vader_single_copy_mechanism=none
export OMPI_MCA_btl=openib,self,vader
export OMPI_MCA_pml=^ucx 
./configure CC=/usr/lib64/openmpi4-gcc8/bin/mpicc CXX=/usr/lib64/openmpi4-gcc8/bin/mpicxx F77=/usr/lib64/openmpi4-gcc8/bin/mpifort FC=/usr/lib64/openmpi4-gcc8/bin/mpifort FCFLAGS="-O3 -march=native -mno-avx512f" CFLAGS="-O3 -march=native -mno-avx512f" --prefix=/usr/local  --enable-openmp --enable-mpi --disable-shared --enable-static --enable-threads
make -j 16
make check

FFTW transforms passed basic tests!
FFTW threaded transforms passed basic tests!
MPI FFTW transforms passed 10 tests, 1 CPU
MPI FFTW transforms passed 10 tests, 2 CPUs
MPI FFTW transforms passed 10 tests, 3 CPUs
MPI FFTW transforms passed 10 tests, 4 CPUs
MPI FFTW threaded transforms passed 10 tests!

sudo make install

The serial, mpi, threaded, and openmp versions are created

/usr/local/lib/libfftw3.a
/usr/local/lib/libftw3_mpi.a
/usr/local/lib/libfftw3_omp.a
/usr/local/lib/libfftw3_threads.a

Gsl

Reduced optimization to "-O1"

wget http://gnu.askapache.com/gsl/gsl-2.6.tar.gz
tar -zxvf gsl-2.6.tar.gz
./configure --prefix=/usr/local CC=/usr/lib64/openmpi4-gcc8/bin/mpicc FC=/usr/lib64/openmpi4-gcc8/bin/mpifort CXX=/usr/lib64/openmpi4-gcc8/bin/mpicxx FCFLAGS="-O1 -march=native -mno-avx512f" CFLAGS="-O1 -march=native -mno-avx512f" CXXFLAGS="-O1 -march=native -mno-avx512f" --enable-shared=no
make -j 16
make check

============================================================================
Testsuite summary for gsl 2.6
============================================================================
all checks passed

sudo make install

/usr/local/lib/libgslcblas.a
/usr/local/lib/libgsl.a

Libint2

Reduced optimization to "-O2"

wget https://github.com/cp2k/libint-cp2k/releases/download/v2.6.0/libint-v2.6.0-cp2k-lmax-4.tgz
wget https://github.com/cp2k/libint-cp2k/releases/download/v2.6.0/libint-v2.6.0-cp2k-lmax-5.tgz
wget https://github.com/cp2k/libint-cp2k/releases/download/v2.6.0/libint-v2.6.0-cp2k-lmax-6.tgz
wget https://github.com/cp2k/libint-cp2k/releases/download/v2.6.0/libint-v2.6.0-cp2k-lmax-7.tgz
tar -zxvf libint-v2.6.0-cp2k-lmax-5.tgz
cd libint-v2.6.0-cp2k-lmax-5
./configure --prefix=/usr/local CC=/usr/lib64/openmpi4-gcc8/bin/mpicc FC=/usr/lib64/openmpi4-gcc8/bin/mpifort CXX=/usr/lib64/openmpi4-gcc8/bin/mpicxx FCFLAGS="-O2 -march=native -mno-avx512f" CFLAGS="-O2 -march=native -mno-avx512f" CXXFLAGS="-O2 -march=native -mno-avx512f" --enable-shared=no --enable-fortran --with-cxx-optflags="-O2 -march=native -mno-avx512f"
make -j 24
make -j 24 check

all tests ok

sudo make install
sudo make install_fortran

/usr/local/lib/libint2.a

Libxsmm

A library for improved performance for matrix multiplication and deep learning primitives

wget https://www.cp2k.org/static/downloads/libxsmm-1.15.tar.gz
tar -zxvf libxsmm-1.15.tar.gz
make PREFIX=/usr/local CC=/usr/lib64/openmpi4-gcc8/bin/mpicc FC=/usr/lib64/openmpi4-gcc8/bin/mpifort CXX=/usr/lib64/openmpi4-gcc8/bin/mpicxx FCFLAGS="-O3 -march=native -mno-avx512f" CFLAGS="-O3 -march=native -mno-avx512f" STATIC=1 NO_SHARED=1 MALLOC=0  -j 24

all 16 test okay

make PREFIX=/usr/local STATIC=1 NO_SHARED=1 install

/usr/local/lib/libxsmm.a
/usr/local/lib/libxsmmext.a
/usr/local/lib/libxsmmf.a
/usr/local/lib/libxsmmgen.a
/usr/local/lib/libxsmmnoblas.a

Libxc

wget https://www.cp2k.org/static/downloads/libxc-4.3.4.tar.gz
tar -zxvf libxc-4.3.4.tar.gz
cd libxc-4.3.4
./configure --prefix=/usr/local CC=/usr/lib64/openmpi4-gcc8/bin/mpicc FC=/usr/lib64/openmpi4-gcc8/bin/mpifort CXX=/usr/lib64/openmpi4-gcc8/bin/mpicxx FCFLAGS="-O3 -march=native -mno-avx512f" CFLAGS="-O3 -march=native -mno-avx512f" CXXFLAGS="-O3 -march=native -mno-avx512f" --enable-shared=no --enable-static=yes --enable-fortran
make -j 24
make check

PASS: xc-run_testsuite
============================================================================
Testsuite summary for libxc 4.3.4
============================================================================
# TOTAL: 1
# PASS: 1

sudo make install
sudo cp src/*.mod /usr/local/include/

/usr/local/lib/libxc.a
/usr/local/lib/libxcf03.a
/usr/local/lib/libxcf90.a

libvdwxc

Libvdwxc is a general library for evaluating energy and potential for exchange-correlation (XC) functionals from the vdW-DF.

wget https://www.cp2k.org/static/downloads/libvdwxc-0.4.0.tar.gz
tar -zxvf libvdwxc-0.4.0.tar.gz
./configure CC=/usr/lib64/openmpi4-gcc8/bin/mpicc FC=/usr/lib64/openmpi4-gcc8/bin/mpifort FCFLAGS="-O3 -march=native -mno-avx512f" FFTW3_INCLUDES="-I/usr/local/include/" FFTW3_LIBS="/usr/local/lib/libfftw3_mpi.a /usr/local/lib/libfftw3.a" CFLAGS="-O3 -march=native -mno-avx512f"  --prefix=/usr/local --enable-shared=no --enable-static=yes
make -j 24
make check
sudo make install


METIS

tar -zxvf parmetis-4.0.3.tar.gz
cd parmetis-4.0.3
cd metis
make config prefix=/share/apps/METIS CC=/usr/lib64/openmpi4-gcc8/bin/mpicc FC=/usr/lib64/openmpi4-gcc8/bin/mpifort CXX=/usr/lib64/openmpi4-gcc8/bin/mpicxx FCFLAGS="-O3 -march=native -mno-avx512f" CFLAGS="-O3 -march=native -mno-avx512f" CXXFLAGS="-O3 -march=native -mno-avx512f"
make -j 24
sudo make install

/share/apps/METIS/lib/libmetis.a

ParMETIS

wget https://www.cp2k.org/static/downloads/parmetis-4.0.3.tar.gz
tar -zxvf parmetis-4.0.3.tar.gz
cd parmetis-4.0.3
make config prefix=/share/apps/ParMETIS CC=/usr/lib64/openmpi4-gcc8/bin/mpicc FC=/usr/lib64/openmpi4-gcc8/bin/mpifort CXX=/usr/lib64/openmpi4-gcc8/bin/mpicxx FCFLAGS="-O3 -march=native -mno-avx512f" CFLAGS="-O3 -march=native -mno-avx512f" CXXFLAGS="-O3 -march=native -mno-avx512f"
make -j 24
sudo make install

/share/apps/ParMETIS/lib/libparmetis.a

Superlu-6.1.1

wget https://github.com/xiaoyeli/superlu_dist/archive/v6.1.1.tar.gz
mv v6.1.1.tar.gz superlu_dist_6.1.1.tar.gz
tar -zxvf superlu_dist_6.1.1.tar.gz
mkdir build1
cd build1
cmake .. -DCMAKE_FIND_ROOT_PATH="$ROOTDIR" -DCMAKE_INSTALL_PREFIX=/usr/local -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DMPI_C_COMPILER=/usr/lib64/openmpi4-gcc8/bin/mpicc -DMPI_CXX_COMPILER=/usr/lib64/openmpi4-gcc8/bin/mpicxx -DMPI_Fortran_COMPILER=/usr/lib64/openmpi4-gcc8/bin/mpifort  -DTPL_PARMETIS_LIBRARIES="-L/share/apps/METIS/lib -L/share/apps/ParMETIS/lib /share/apps/ParMETIS/lib/libparmetis.a /share/apps/METIS/lib/libmetis.a -pthread -Wl,-rpath -Wl,/usr/lib64/openmpi4-gcc8/lib -Wl,--enable-new-dtags -L/usr/lib64/openmpi4-gcc8/lib -lmpi_mpifh -lmpi_cxx -lmpi" -DTPL_PARMETIS_INCLUDE_DIRS=/share/apps/ParMETIS/include -DBUILD_STATIC_LIBS=OFF -DCMAKE_C_FLAGS="-std=c99 -O3 -march=native -mno-avx512f" -DCMAKE_Fortran_FLAGS="-O3 -march=native -mno-avx512f" -DTPL_LAPACK_LIBRARIES=/usr/local/lib/libscalapack.a -DTPL_BLAS_LIBRARIES=/usr/local/lib/libopenblas.a
make test

all 24 tests passed

sudo make install

/usr/local/lib/libsuperlu_dist.a

Superlu-5.1.2

wget http://crd-legacy.lbl.gov/~xiaoye/SuperLU/superlu_dist_5.1.2.tar.gz
tar -zxvf superlu_dist_5.1.2.tar.gz
mkdir build
cd build
cmake .. -DCMAKE_FIND_ROOT_PATH="$ROOTDIR" -DCMAKE_INSTALL_PREFIX=/usr/local -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DMPI_C_COMPILER=/usr/lib64/openmpi4-gcc8/bin/mpicc -DMPI_CXX_COMPILER=/usr/lib64/openmpi4-gcc8/bin/mpicxx -DMPI_Fortran_COMPILER=/usr/lib64/openmpi4-gcc8/bin/mpifort  -DTPL_PARMETIS_LIBRARIES="-L/share/apps/METIS/lib -L/share/apps/ParMETIS/lib /share/apps/ParMETIS/lib/libparmetis.a /share/apps/METIS/lib/libmetis.a -pthread -Wl,-rpath -Wl,/usr/lib64/openmpi4-gcc8/lib -Wl,--enable-new-dtags -L/usr/lib64/openmpi4-gcc8/lib -lmpi_mpifh -lmpi_cxx -lmpi" -DTPL_PARMETIS_INCLUDE_DIRS=/share/apps/ParMETIS/include -DCMAKE_C_FLAGS="-std=c99 -O3 -march=native -mno-avx512f" -DCMAKE_Fortran_FLAGS="-O3 -march=native -mno-avx512f" -DTPL_BLAS_LIBRARIES=/usr/local/lib/libopenblas_omp.a
make test

Running tests...
Test project /usr/local/src/SuperLU_DIST_5.1.2/build
Start 1: pddrive
1/2 Test #1: pddrive .......................... Passed 79.70 sec
Start 2: pddrive1
2/2 Test #2: pddrive1 ......................... Passed 78.40 sec

100% tests passed, 0 tests failed out of 2

Total Test time (real) = 158.10 sec

sudo make install

/usr/local/lib/libsuperlu_dist.a


Elpa

ELPA replaces the ScaLapack SYEVD to improve the performance of Eigen-vector/value problems.

wget http://elpa.mpcdf.mpg.de/html/Releases/2019.11.001/elpa-2019.11.001.tar.gz
tar -zxvf elpa-2019.11.001.tar.gz
cd elpa-2019.11.001
./configure FC=/usr/lib64/openmpi4-gcc8/bin/mpifort CC=/usr/lib64/openmpi4-gcc8/bin/mpicc CXX=/usr/lib64/openmpi4-gcc8/bin/mpicxx FCFLAGS="-O3 -march=native -mno-avx512f" CFLAGS="-O3 -march=native -mno-avx512f"  --prefix=/usr/local --enable-shared=no --enable-static=yes --enable-openmp=no --disable-avx512

hack: change LIBS in Makefile: LIBS = /usr/local/lib/libscalapack.a /usr/local/lib/libopenblas.a

make check

some skipped, all others passed

sudo make install

/usr/local/include/elpa-2019.11.001/modules/elpa_constants.mod
/usr/local/include/elpa-2019.11.001/modules/elpa_api.mod
/usr/local/include/elpa-2019.11.001/modules/elpa.mod
/usr/local/lib/libelpa.a

Pexsi 1.2

The source code must be edited for use with openmpi-4.

MPI_Address → MPI_Get_address
MPI_Type_hindexed → MPI_Type_create_hindexed
MPI_Type_struct → MPI_Type_create_struct
MPI_Errhandler_set→ remove lines

The Pole EXpansion and Selected Inversion (PEXSI) method requires the PEXSI library and two dependencies (ParMETIS and SuperLU_DIST).

wget https://www.cp2k.org/static/downloads/pexsi_v1.2.0.tar.gz
tar -zxvf pexsi_v1.2.0.tar.gz
mkdir build
cd build
cmake3 .. -DCMAKE_FIND_ROOT_PATH="$ROOTDIR" -DCMAKE_INSTALL_PREFIX=/usr/local -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DMPI_C_COMPILER=/usr/lib64/openmpi4-gcc8/bin/mpicc -DMPI_CXX_COMPILER=/usr/lib64/openmpi4-gcc8/bin/mpicxx -DMPI_Fortran_COMPILER=/usr/lib64/openmpi4-gcc8/bin/mpifort -DMETIS_LIBRARIES=/share/apps/METIS/lib/libmetis.a -DPARMETIS_LIBRARIES=/share/apps/ParMETIS/lib/libparmetis.a -Dlinalg_BLAS_LIBRARIES=/usr/local/lib/libopenblas.a -Dlinalg_LAPACK_LIBRARIES=/usr/local/lib/libscalapack.a -DCMAKE_CXX_FLAGS="-O3 -march=native -mno-avx512f" -DCMAKE_C_FLAGS="-std=c99 -O2 -march=native  -mno-avx512f" -DCMAKE_Fortran_FLAGS="-O3 -march=native  -mno-avx512f"

edit cmake/modules/FindLinAlg.cmake uncomment line 373 and 375.

make -j 24
sudo make install
sudo make finstall

Pexsi 0.10.2

cp config/make.inc.linux.gnu ./make.inc

PAR_ND_LIBRARY = parmetis
SEQ_ND_LIBRARY = metis

CC = /usr/lib64/openmpi4-gcc8/bin/mpicc
CXX = /usr/lib64/openmpi4-gcc8/bin/mpicxx
FC = /usr/lib64/openmpi4-gcc8/bin/mpifort
LOADER = /usr/lib64/openmpi4-gcc8/bin/mpicxx

DSUPERLU_DIR = /usr/local
METIS_DIR = /share/apps/METIS
PARMETIS_DIR = /share/apps/ParMETIS

DSUPERLU_INCLUDE = -I/usr/local/include

GFORTRAN_LIB = /opt/rh/devtoolset-8/root/usr/lib/gcc/x86_64-redhat-linux/8/libgfortran.a
LAPACK_LIB = /usr/local/lib/libscalapack.a
BLAS_LIB = /usr/local/lib/libopenblas.a

The source code must be edited for use with openmpi-4.

MPI_Address → MPI_Get_address
MPI_Type_hindexed → MPI_Type_create_hindexed
MPI_Type_struct → MPI_Type_create_struct

make
make install
sudo cp /usr/local/src/pexsi_v0.10.2/build/lib/libpexsi_linux_v0.10.2.a /usr/local/lib
sudo cp /usr/local/src/pexsi_v0.10.2/build/include/* /usr/local/include/

SpFFT

Needed in SIRIUS.

wget https://github.com/eth-cscs/SpFFT/archive/v0.9.9.tar.gz
mv v0.9.9.tar.gz SpFFT-0.9.9.tar.gz
tar -zxvf SpFFT-0.9.9.tar.gz
cd SpFFT-0.9.9
mkdir build
cd build
cmake3 .. -DCMAKE_INSTALL_PREFIX=/usr/local -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DMPI_C_COMPILER=/usr/lib64/openmpi4-gcc8/bin/mpicc -DMPI_CXX_COMPILER=/usr/lib64/openmpi4-gcc8/bin/mpicxx -DCMAKE_INSTALL_RPATH_USE_LINK_PATH=TRUE -DSPFFT_OMP=ON -DSPFFT_MPI=ON -DSPFFT_INSTALL=ON -DSPFFT_STATIC=ON
sudo make install

/usr/local/lib/libspfft.a

SIRIUS 6.5.3

SIRIUS is a domain specific library for electronic structure calculations.

wget https://github.com/electronic-structure/SIRIUS/archive/v6.5.3.tar.gz
mv v6.5.3.tar.gz SIRIUS-v6.5.3.tar.gz
tar -zxvf SIRIUS-v6.5.3.tar.gz
cd SIRIUS-6.5.3
mkdir build
cd build
cmake3 .. -DCMAKE_INSTALL_PREFIX=/share/apps/SIRIUS -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DMPI_C_COMPILER=/usr/lib64/openmpi4-gcc8/bin/mpicc -DMPI_CXX_COMPILER=/usr/lib64/openmpi4-gcc8/bin/mpicxx -DMPI_Fortran_COMPILER=/usr/lib64/openmpi4-gcc8/bin/mpifort  -DGSL_CBLAS_LIBRARY=/usr/local/lib/libgsl.a -DGSL_CBLAS_LIBRARY_DEBUG=/usr/local/lib/libgslcblas.a -DGSL_LIBRARY=/usr/local/lib/libgsl.a -DGSL_LIBRARY_DEBUG=/usr/local/lib/libgsl.a -DUSE_ELPA=ON -DELPA_INCLUDE_DIR=/usr/local/include/elpa-2019.11.001/elpa -DELPA_LIBRARIES="/usr/local/lib/libelpa.a;/usr/local/lib/libscalapack.a;/usr/local/lib/libopenblas.a" -DUSE_VDWXC=ON -DLIBVDWXC_LIBRARIES="/usr/local/lib/libvdwxc.a;/usr/local/lib/libfftw3.a"
make -j 24
make install

SIRIUS 6.3.2

wget https://www.cp2k.org/static/downloads/SIRIUS-6.3.2.tar.gz
mkdir build
cd build
cmake3 .. -DCMAKE_INSTALL_PREFIX=/share/apps/SIRIUS -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DMPI_C_COMPILER=/usr/lib64/openmpi4-gcc8/bin/mpicc -DMPI_CXX_COMPILER=/usr/lib64/openmpi4-gcc8/bin/mpicxx -DMPI_Fortran_COMPILER=/usr/lib64/openmpi4-gcc8/bin/mpifort  -DUSE_SCALAPACK=ON -DSCALAPACK_LIBRARIES=/usr/local/lib/libscalapack.a -DBLAS_openblas_LIBRARY=/usr/local/lib/libopenblas.a  -DLAPACK_openblas_LIBRARY=/usr/local/lib/libopenblas.a -DGSL_CBLAS_LIBRARY=/usr/local/lib/libgsl.a -DGSL_CBLAS_LIBRARY_DEBUG=/usr/local/lib/libgslcblas.a -DGSL_LIBRARY=/usr/local/lib/libgsl.a -DGSL_LIBRARY_DEBUG=/usr/local/lib/libgsl.a
make -j 24
make install -j 24
cp src/mod_files/sirius.mod /share/apps/SIRIUS/include/

Plumed

Library that enables various enhanced sampling methods.

tar -zxvf plumed-2.6.0.tgz
./configure --prefix=/share/apps/plumed/plumed-2.6.0 --enable-shared=no

change in Makefile.conf

DYNAMIC_LIBS= -lstdc++ -lfftw3 -lgsl -lgslcblas -lz -ldl -llapack -lblas   -fopenmp

to

DYNAMIC_LIBS= -lstdc++ /usr/local/lib/libfftw3.a /usr/local/lib/libgsl.a /usr/local/lib/libgslcblas.a -lz -ldl /usr/local/lib/libopenblas_omp.a -fopenmp -lgfortran

make -j 24 make install

QUIP

Do not build using parallel-make

QUantum mechanics and Interatomic Potentials allows for a wider range of interaction potentials.

wget https://www.cp2k.org/static/downloads/QUIP-cc83ceea5776c40fcb5ab224a25ab04d62175449.zip
export QUIP_ARCH=linux_x86_64_gfortran_openmpi
make config

Please enter the linking options for LAPACK and BLAS libraries:

fill in /usr/local/lib/libopenblas.a

Do you want to compile with CP2K support? y
Do you want to compile with VASP support? y
Do you want to compile with Tight Binding (TB) support? y
Please enter directories where METIS libraries are kept:

fill in /share/apps/METIS/lib/
make libquip
mkdir -p /share/apps/QUIP/QUIP-2015
make PREFIX=/share/apps/QUIP/QUIP-2015 QUIP_INSTALLDIR=/share/apps/QUIP/QUIP-2015 install
mkdir /share/apps/QUIP/QUIP-2015/include/
cp build/linux_x86_64_gfortran_openmpi/quip_unified_wrapper_module.mod /share/apps/QUIP/QUIP-2015/include/
mkdir /share/apps/QUIP/QUIP-2015/lib/
cp build/linux_x86_64_gfortran_openmpi/*.a /share/apps/QUIP/QUIP-2015/lib
cp src/FoX-4.0.3/objs.linux_x86_64_gfortran_openmpi/lib/libFoX_*.a /share/apps/QUIP/QUIP-2015/lib

CP2K

Reduced optimization to "-O2"

CC = /usr/lib64/openmpi4-gcc8/bin/mpicc
FC = /usr/lib64/openmpi4-gcc8/bin/mpifort
LD = /usr/lib64/openmpi4-gcc8/bin/mpifort
AR = ar -r

CFLAGS = -O2 -g -march=native -mno-avx512f -I/usr/local/include/ -L/usr/local/include/

DFLAGS = -D__MPI_VERSION=4 -D__parallel -D__SCALAPACK
DFLAGS += -D__FFTW3 -D__LIBXC -D__LIBINT -D__LIBXSMM -D__SPGLIB

FCFLAGS = $(CFLAGS) $(DFLAGS)
FCFLAGS += -ffree-form -ffree-line-length-none
FCFLAGS += -ftree-vectorize -funroll-loops -std=f2008
FCFLAGS += -I/usr/local/include -I/share/apps/QUIP/QUIP-2015/include/ -I/share/apps/METIS/include/ -I/share/apps/ParMETIS/include -I/share/apps/plumed/plumed-2.6.0/include/ -I/share/apps/SIRIUS/include/

LDFLAGS = $(FCFLAGS)

LIBS = /usr/local/lib/libgsl.a /usr/local/lib/libgslcblas.a
LIBS += /usr/local/lib/libxcf03.a /usr/local/lib/libxc.a
LIBS += /usr/local/lib/libint2.a
LIBS += /usr/local/lib/libfftw3.a
LIBS += /usr/local/lib/libxsmmf.a /usr/local/lib/libxsmm.a
LIBS += /usr/local/lib/libscalapack.a /usr/local/lib/libopenblas.a
LIBS += -lz -lsymspg -ldl -lpthread -lstdc++ -lgomp -lmpi_mpifh -lmpi_cxx -lmpi

FYPPFLAGS = -n --line-marker-format=gfortran5


Summary --------------------------------

Number of FAILED tests 0
Number of WRONG tests 0
Number of CORRECT tests 3261
Number of NEW tests 8
Total number of tests 3269
GREPME 0 0 3261 8 3269 X

Summary: correct: 3261 / 3269; new: 8; 38min
Status: OK


To solve SIRIUS warnings in the top of the output, use the latest version of SIRIUS and modify the following cp2k source files:

cd cp2k-7.1
cd src
wget https://raw.githubusercontent.com/cp2k/cp2k/b1fc8cddc7ca9b30bf6de29e87ac9e3f14c60136/src/sirius_interface.F
wget https://raw.githubusercontent.com/cp2k/cp2k/b1fc8cddc7ca9b30bf6de29e87ac9e3f14c60136/src/input_cp2k_pwdft.F

# Tested with: GFortran 8.3.1, OPENMPICH 4.0.3, OpenBLAS 0.3.0, ScaLAPACK 2.1.0, FFTW 3.3.8, Libint 2.6.0, Libxc 4.3.4, libxsmm 1.15,
# ELPA 2019.11.001, PLUMED 2.6.0, SPGLIB 1.12.2, QUIP-2015, ParMETIS-4.0.3, Pexsi-1.2.0, SIRIUS 6.5.3, SpFFT 0.9.9, libvdw 0.4.0
# Author: David Dubbeldam (d.dubbeldam@uva.nl, University of Amsterdam, May 2020)

CC = /usr/lib64/openmpi4-gcc8/bin/mpicc
FC = /usr/lib64/openmpi4-gcc8/bin/mpifort
LD = /usr/lib64/openmpi4-gcc8/bin/mpifort
AR = ar -r

CFLAGS = -O2 -g -march =native -mno-avx512f -I/usr/local/include/ -L/usr/local/include/

DFLAGS = -D__MPI_VERSION=4 -D__parallel -D__SCALAPACK
DFLAGS += -D__FFTW3 -D__LIBXC -D__LIBINT -D__LIBXSMM -D__SPGLIB
DFLAGS += -D__SIRIUS -D__PLUMED2 -D__QUIP -D__LIBPEXSI -D__ELPA

FCFLAGS = $(CFLAGS) $(DFLAGS)
FCFLAGS += -ffree-form -ffree-line-length-none
FCFLAGS += -ftree-vectorize -funroll-loops -std=f2008
FCFLAGS += -I/usr/local/include -I/share/apps/QUIP/QUIP-2015/include/ -I/share/apps/METIS/include/ -I/share/apps/ParMETIS/include
FCFLAGS += -I/share/apps/plumed/plumed-2.6.0/include/ -I/share/apps/SIRIUS/include/sirius
FCFLAGS += -I/usr/local/include/elpa-2019.11.001/elpa -I/usr/local/include/elpa-2019.11.001/modules

LDFLAGS = $(FCFLAGS)

LIBS = $(PLUMED_DEPENDENCIES) /share/apps/plumed/plumed-2.6.0/lib/libplumed.a
LIBS += /usr/local/lib/libelpa.a
LIBS += /usr/local/lib64/libpexsi.a
LIBS += /share/apps/SIRIUS/lib64/libsirius.a
LIBS += /usr/local/lib/libspfft.a
LIBS += /usr/local/lib/libvdwxc.a
LIBS += /usr/local/lib64/libsuperlu_dist.a
LIBS += /share/apps/QUIP/QUIP-2015/lib/libquip_core.a /share/apps/QUIP/QUIP-2015/lib/libatoms.a
LIBS += /share/apps/QUIP/QUIP-2015/lib/libFoX_sax.a
LIBS += /share/apps/QUIP/QUIP-2015/lib/libFoX_common.a
LIBS += /share/apps/QUIP/QUIP-2015/lib/libFoX_utils.a
LIBS += /share/apps/QUIP/QUIP-2015/lib/libFoX_fsys.a
LIBS += /share/apps/ParMETIS/lib/libparmetis.a /share/apps/METIS/lib/libmetis.a
LIBS += /usr/local/lib/libgsl.a /usr/local/lib/libgslcblas.a
LIBS += /usr/local/lib/libxcf03.a /usr/local/lib/libxc.a
LIBS += /usr/local/lib/libint2.a
LIBS += /usr/local/lib/libfftw3.a
LIBS += /usr/local/lib/libxsmmf.a /usr/local/lib/libxsmm.a
LIBS += /usr/local/lib/libscalapack.a /usr/local/lib/libopenblas.a
LIBS += -lnetcdf -lhdf5_hl -lhdf5 -lz -lsymspg -ldl -lpthread -lstdc++ -lgomp -lmpi_mpifh -lmpi_cxx -lmpi

FYPPFLAGS = -n --line-marker-format=gfortran5

All tests are fine, but the regression-test hangs on one job, after killing it the test results are

Summary --------------------------------
Number of FAILED tests 1
Number of WRONG tests 0
Number of CORRECT tests 3285
Number of NEW tests 0
Total number of tests 3286
GREPME 1 0 3285 0 3286 X

Summary: correct: 3285 / 3286; failed: 1; 95min
Status: FAILED