diff --git a/.github/contributing.md b/.github/contributing.md new file mode 100644 index 0000000000..45ebe372fb --- /dev/null +++ b/.github/contributing.md @@ -0,0 +1,31 @@ +# License and Signed-off-by token + +In order to ensure that we can keep distributing hwloc under our +[open source license](/COPYING), we need to ensure that all +contributions are compatible with that license. + +To that end, we require that all Git commits contributed to hwloc +have a "Signed-off-by" token indicating that the commit author agrees +with [Open MPI's Contributor's +Declaration](https://github.com/open-mpi/ompi/wiki/Administrative-rules#contributors-declaration). + +If you have not already done so, please ensure that: + +1. Every commit contains exactly the "Signed-off-by" token. You can +add this token via `git commit -s`. +1. The email address after "Signed-off-by" must match the Git commit +email address. + +# Copyright + +You may also update the copyright headers whenever you modify +a file. `contrib/update-my-copyright.pl` may help you doing so. +It requires you to set the `HWLOC_COPYRIGHT_FORMAT_NAME` environment +to something like `Inria. All rights reserved.` +and `HWLOC_COPYRIGHT_SEARCH_NAME` to something like `Inria`. + +You may even call `contrib/update-my-copyright.pl --check-only` from +the git pre-commit hook so that it prevents committing without +updated copyright headers (unless `--no-verify` is given). + +Major contributors are also listed in the [Authors](/AUTHORS) file. diff --git a/.github/issue_template.md b/.github/issue_template.md index a35599ec7a..0e04730177 100644 --- a/.github/issue_template.md +++ b/.github/issue_template.md @@ -27,9 +27,8 @@ If your issue consists in a wrong topology detection, we also need the following for debugging remotely: * On Linux, run `hwloc-gather-topology myhost` and post the `myhost.*` files that it will generate. Note that this tool may be slow on large nodes or when I/O is enabled. -* On MacOS, `sysctl hw` and `sysctl machdep.cpu` -* On Solaris, `kstat cpu_info` and `lgrp_info -a` -* On BSD, `sysctl hw` +* On Solaris, `kstat -C cpu_info`, `lgrpinfo -a` and `psrinfo` +* On MacOS and BSD, `sysctl hw` and `sysctl machdep` * On BSD x86 platforms, if using hwloc >= 2.0, also run `hwloc-gather-cpuid` and post an archive of the `cpuid` that was generated. * On Windows, `coreinfo -cgnlsm` @@ -38,3 +37,4 @@ If your issue consists in a wrong topology detection, we also need the following You may need to archive the output from the above commands into a `.zip` or `.tar.gz` (not `.bz2`!) file before Github will allow you to drag-n-drop the file into the issue to attach it. Note that upgrading your operating system (e.g., Linux kernel) and platform firmwares (e.g., BIOS) might help solving issues about wrong topology detection. +Some known issues are listed at https://github.com/open-mpi/hwloc/wiki/Linux-kernel-bugs diff --git a/.gitignore b/.gitignore index 0541e31fe3..bee2b5d5bf 100644 --- a/.gitignore +++ b/.gitignore @@ -112,7 +112,6 @@ test-suite.log /tests/hwloc/hwloc_iodevs /tests/hwloc/xmlbuffer /tests/hwloc/gl -/tests/hwloc/intel-mic /tests/hwloc/linux-libnuma /tests/hwloc/glibc-sched /tests/hwloc/openfabrics-verbs @@ -130,6 +129,8 @@ test-suite.log /tests/hwloc/x86/test-topology.sh +/tests/hwloc/x86+linux/test-topology.sh + /tests/hwloc/xml/test-topology.sh /tests/hwloc/embedded/aclocal.m4 @@ -197,6 +198,7 @@ test-suite.log /utils/lstopo/lstopo-no-graphics /utils/lstopo/lstopo-no-graphics.1 /utils/lstopo/test-lstopo.sh +/utils/lstopo/test-lstopo-shmem.sh /utils/lstopo/test-*.sh.log /utils/lstopo/test-*.sh.trs diff --git a/NEWS b/NEWS index 9b2d0ee1f0..3418fd6797 100644 --- a/NEWS +++ b/NEWS @@ -16,9 +16,16 @@ bug fixes (and other actions) for each version of hwloc since version 0.9. +Version 2.2.0 +------------- +* Remove support for Intel Xeon Phi (MIC, Knights Corner) co-processors. + + Version 2.1.0 ------------- * API + + Add a new "Die" object (HWLOC_OBJ_DIE) for upcoming x86 processors + with multiple dies per package, in the x86 and Linux backends. + Add the new HWLOC_OBJ_MEMCACHE object type for memory-side caches. - They are filtered-out by default, except in command-line tools. - They are only available on very recent platforms running Linux 5.2+ @@ -34,22 +41,32 @@ Version 2.1.0 + Improve the API for dealing with disallowed resources - HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM is replaced with FLAG_INCLUDE_DISALLOWED and --whole-system command-line options with --disallowed. - Former names are still accepted for backward compatibility. + . Former names are still accepted for backward compatibility. - Add hwloc_topology_allow() for changing allowed sets after load(). - Add the HWLOC_ALLOW=all environment variable to totally ignore administrative restrictions such as Linux Cgroups. - Add disallowed_pu and disallowed_numa bits to the discovery support structure. + + Group objects have a new "dont_merge" attribute to prevent them from + being automatically merged with identical parent or children. + + Add more distances-related features: + - Add hwloc_distances_get_name() to retrieve a string describing + what a distances structure contain. + - Add hwloc_distances_get_by_name() to retrieve distances structures + based on their name. + - Add hwloc_distances_release_remove() + - Distances may now cover objects of different types with new kind + HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES. * Backends + + Add support for Linux 5.3 new sysfs cpu topology files with Die information. + Add support for Intel v2 Extended Topology Enumeration in the x86 backend. - Tiles, Modules and Dies are exposed as Groups for now. + Improve memory locality on Linux by using HMAT initiators (exposed since Linux 5.2+), and NUMA distances for CPU-less NUMA nodes. - + The x86 now properly handles offline CPUs. + + The x86 backend now properly handles offline CPUs. + Detect the locality of NVIDIA GPU OpenCL devices. + Ignore NUMA nodes that correspond to NVIDIA GPU by default. - - They may be kept if HWLOC_KEEP_NVIDIA_GPU_NUMA_NODES=1 in the environment. - Fix their CPU locality and add info attributes to identify them. + - They may be unignored if HWLOC_KEEP_NVIDIA_GPU_NUMA_NODES=1 in the environment. + - Fix their CPU locality and add info attributes to identify them. Thanks to Max Katz and Edgar Leon for the help. + Add support for IBM S/390 drawers. + Rework the heuristics for discovering KNL Cluster and Memory modes @@ -60,12 +77,17 @@ Version 2.1.0 + Expose Linux DAX devices as hwloc Block OS devices. + Remove support for /proc/cpuinfo-only topology discovery in Linux kernel prior to 2.6.16. + + Disable POWER device-tree-based topology on Linux by default. + - It may be reenabled by setting HWLOC_USE_DT=1 in the environment. + + Discovery components are now divided in phases that may be individually + blacklisted. + - The linuxio component has been merged back into the linux component. * Tools + lstopo - lstopo factorizes objects by default in the graphical output when there are more than 4 identical children. - New options --no-factorize and --factorize may be used to configure this. - Also hit the 'f' key to disable factorizing in interactive outputs. + . New options --no-factorize and --factorize may be used to configure this. + . Hit the 'f' key to disable factorizing in interactive outputs. - Both logical and OS/physical indexes are now displayed by default for PU and NUMA nodes. - The X11 and Windows interactive outputs support many keyboard @@ -78,9 +100,27 @@ Version 2.1.0 See dynamic_SVG_example.html for an example. + Add --nodeset options to hwloc-calc for converting between cpusets and nodesets. + + Add --no-smt to lstopo, hwloc-bind and hwloc-calc to ignore multiple + PU in SMT cores. + hwloc-annotate may annotate multiple locations at once. + Add a HTML/JS version of hwloc-ps. See contrib/hwloc-ps.www/README. + Add bash completions. +* Misc + + Add several FAQ entries in "Compatibility between hwloc versions" + about API version, ABI, XML, Synthetic strings, and shmem topologies. + + +Version 2.0.4 (also included in 1.11.13 when appropriate) +------------- +* Add support for Linux 5.3 new sysfs cpu topology files with Die information. +* Add support for Intel v2 Extended Topology Enumeration in the x86 backend. +* Tiles, Modules and Dies are exposed as Groups for now. + + HWLOC_DONT_MERGE_DIE_GROUPS=1 may be set in the environment to prevent + Die groups from being automatically merged with identical parent or children. +* Ignore NUMA node information from AMD topoext in the x86 backend, + unless HWLOC_X86_TOPOEXT_NUMANODES=1 is set in the environment. +* Group objects have a new "dont_merge" attribute to prevent them from + being automatically merged with identical parent or children. Version 2.0.3 (also included in 1.11.12 when appropriate) @@ -267,6 +307,19 @@ Version 2.0.0 + hwloc now requires a C99 compliant compiler. +Version 1.11.13 (also included in 2.0.4) +--------------- +* Add support for Linux 5.3 new sysfs cpu topology files with Die information. +* Add support for Intel v2 Extended Topology Enumeration in the x86 backend. +* Tiles, Modules and Dies are exposed as Groups for now. + + HWLOC_DONT_MERGE_DIE_GROUPS=1 may be set in the environment to prevent + Die groups from being automatically merged with identical parent or children. +* Ignore NUMA node information from AMD topoext in the x86 backend, + unless HWLOC_X86_TOPOEXT_NUMANODES=1 is set in the environment. +* Group objects have a new "dont_merge" attribute to prevent them from + being automatically merged with identical parent or children. + + Version 1.11.12 (also included in 2.0.3) --------------- * Fix a corner case of hwloc_topology_restrict() where children would diff --git a/VERSION b/VERSION index 114e340db3..57bb8410b0 100644 --- a/VERSION +++ b/VERSION @@ -8,7 +8,7 @@ # Please update HWLOC_VERSION* in contrib/windows/hwloc_config.h too. major=2 -minor=1 +minor=2 release=0 # greek is used for alpha or beta release tags. If it is non-empty, diff --git a/config/hwloc.m4 b/config/hwloc.m4 index 4554eeb01c..82a7d38480 100644 --- a/config/hwloc.m4 +++ b/config/hwloc.m4 @@ -1,6 +1,6 @@ dnl -*- Autoconf -*- dnl -dnl Copyright © 2009-2018 Inria. All rights reserved. +dnl Copyright © 2009-2019 Inria. All rights reserved. dnl Copyright © 2009-2012, 2015-2017 Université Bordeaux dnl Copyright © 2004-2005 The Trustees of Indiana University and Indiana dnl University Research and Technology @@ -217,8 +217,7 @@ EOF]) AC_MSG_RESULT([Linux]) hwloc_components="$hwloc_components linux" if test "x$enable_io" != xno; then - hwloc_components="$hwloc_components linuxio" - AC_DEFINE(HWLOC_HAVE_LINUXIO, 1, [Define to 1 if building the Linux I/O component]) + AC_DEFINE(HWLOC_HAVE_LINUXIO, 1, [Define to 1 for I/O discovery in the Linux component]) hwloc_linuxio_happy=yes if test x$enable_pci != xno; then AC_DEFINE(HWLOC_HAVE_LINUXPCI, 1, [Define to 1 if enabling Linux-specific PCI discovery in the Linux I/O component]) @@ -1220,11 +1219,13 @@ return clGetDeviceIDs(0, 0, 0, NULL, NULL); AS_IF([test "$hwloc_mode" = "embedded"], [HWLOC_EMBEDDED_CFLAGS=$HWLOC_CFLAGS HWLOC_EMBEDDED_CPPFLAGS=$HWLOC_CPPFLAGS + HWLOC_EMBEDDED_LDFLAGS=$HWLOC_LDFLAGS HWLOC_EMBEDDED_LDADD='$(HWLOC_top_builddir)/hwloc/libhwloc_embedded.la' HWLOC_EMBEDDED_LIBS=$HWLOC_LIBS HWLOC_LIBS=]) AC_SUBST(HWLOC_EMBEDDED_CFLAGS) AC_SUBST(HWLOC_EMBEDDED_CPPFLAGS) + AC_SUBST(HWLOC_EMBEDDED_LDFLAGS) AC_SUBST(HWLOC_EMBEDDED_LDADD) AC_SUBST(HWLOC_EMBEDDED_LIBS) diff --git a/config/hwloc_internal.m4 b/config/hwloc_internal.m4 index 9eb4813137..a146114764 100644 --- a/config/hwloc_internal.m4 +++ b/config/hwloc_internal.m4 @@ -438,6 +438,7 @@ int foo(void) { hwloc_config_prefix[tests/hwloc/linux/allowed/Makefile] hwloc_config_prefix[tests/hwloc/linux/gather/Makefile] hwloc_config_prefix[tests/hwloc/x86/Makefile] + hwloc_config_prefix[tests/hwloc/x86+linux/Makefile] hwloc_config_prefix[tests/hwloc/xml/Makefile] hwloc_config_prefix[tests/hwloc/ports/Makefile] hwloc_config_prefix[tests/hwloc/rename/Makefile] @@ -445,6 +446,7 @@ int foo(void) { hwloc_config_prefix[tests/hwloc/linux/gather/test-gather-topology.sh] hwloc_config_prefix[tests/hwloc/linux/test-topology.sh] hwloc_config_prefix[tests/hwloc/x86/test-topology.sh] + hwloc_config_prefix[tests/hwloc/x86+linux/test-topology.sh] hwloc_config_prefix[tests/hwloc/xml/test-topology.sh] hwloc_config_prefix[tests/hwloc/wrapper.sh] hwloc_config_prefix[utils/hwloc/hwloc-compress-dir] @@ -459,6 +461,7 @@ int foo(void) { hwloc_config_prefix[utils/hwloc/test-hwloc-dump-hwdata/Makefile] hwloc_config_prefix[utils/hwloc/test-hwloc-dump-hwdata/test-hwloc-dump-hwdata.sh] hwloc_config_prefix[utils/lstopo/test-lstopo.sh] + hwloc_config_prefix[utils/lstopo/test-lstopo-shmem.sh] hwloc_config_prefix[utils/netloc/infiniband/netloc_ib_gather_raw] hwloc_config_prefix[contrib/hwloc-ps.www/Makefile] hwloc_config_prefix[contrib/systemd/Makefile] @@ -469,27 +472,29 @@ int foo(void) { hwloc_config_prefix[tests/netloc/tests.sh] ) - AC_CONFIG_COMMANDS([chmoding-scripts], [ -chmod +x ]hwloc_config_prefix[tests/hwloc/linux/test-topology.sh \ - ]hwloc_config_prefix[tests/hwloc/x86/test-topology.sh \ - ]hwloc_config_prefix[tests/hwloc/xml/test-topology.sh \ - ]hwloc_config_prefix[tests/hwloc/linux/allowed/test-topology.sh \ - ]hwloc_config_prefix[tests/hwloc/linux/gather/test-gather-topology.sh \ - ]hwloc_config_prefix[tests/hwloc/wrapper.sh \ - ]hwloc_config_prefix[utils/hwloc/hwloc-compress-dir \ - ]hwloc_config_prefix[utils/hwloc/hwloc-gather-topology \ - ]hwloc_config_prefix[utils/hwloc/test-hwloc-annotate.sh \ - ]hwloc_config_prefix[utils/hwloc/test-hwloc-calc.sh \ - ]hwloc_config_prefix[utils/hwloc/test-hwloc-compress-dir.sh \ - ]hwloc_config_prefix[utils/hwloc/test-hwloc-diffpatch.sh \ - ]hwloc_config_prefix[utils/hwloc/test-hwloc-distrib.sh \ - ]hwloc_config_prefix[utils/hwloc/test-hwloc-info.sh \ - ]hwloc_config_prefix[utils/hwloc/test-fake-plugin.sh \ - ]hwloc_config_prefix[utils/hwloc/test-hwloc-dump-hwdata/test-hwloc-dump-hwdata.sh \ - ]hwloc_config_prefix[utils/lstopo/test-lstopo.sh \ - ]hwloc_config_prefix[utils/netloc/infiniband/netloc_ib_gather_raw \ - ]hwloc_config_prefix[contrib/windows/test-windows-version.sh \ - ]hwloc_config_prefix[tests/netloc/tests.sh]) + AC_CONFIG_COMMANDS([chmoding-scripts], [chmod +x] \ + hwloc_config_prefix[tests/hwloc/linux/test-topology.sh] \ + hwloc_config_prefix[tests/hwloc/x86/test-topology.sh] \ + hwloc_config_prefix[tests/hwloc/x86+linux/test-topology.sh] \ + hwloc_config_prefix[tests/hwloc/xml/test-topology.sh] \ + hwloc_config_prefix[tests/hwloc/linux/allowed/test-topology.sh] \ + hwloc_config_prefix[tests/hwloc/linux/gather/test-gather-topology.sh] \ + hwloc_config_prefix[tests/hwloc/wrapper.sh] \ + hwloc_config_prefix[utils/hwloc/hwloc-compress-dir] \ + hwloc_config_prefix[utils/hwloc/hwloc-gather-topology] \ + hwloc_config_prefix[utils/hwloc/test-hwloc-annotate.sh] \ + hwloc_config_prefix[utils/hwloc/test-hwloc-calc.sh] \ + hwloc_config_prefix[utils/hwloc/test-hwloc-compress-dir.sh] \ + hwloc_config_prefix[utils/hwloc/test-hwloc-diffpatch.sh] \ + hwloc_config_prefix[utils/hwloc/test-hwloc-distrib.sh] \ + hwloc_config_prefix[utils/hwloc/test-hwloc-info.sh] \ + hwloc_config_prefix[utils/hwloc/test-fake-plugin.sh] \ + hwloc_config_prefix[utils/hwloc/test-hwloc-dump-hwdata/test-hwloc-dump-hwdata.sh] \ + hwloc_config_prefix[utils/lstopo/test-lstopo.sh] \ + hwloc_config_prefix[utils/lstopo/test-lstopo-shmem.sh] \ + hwloc_config_prefix[utils/netloc/infiniband/netloc_ib_gather_raw] \ + hwloc_config_prefix[contrib/windows/test-windows-version.sh] \ + hwloc_config_prefix[tests/netloc/tests.sh]) # These links are only needed in standalone mode. It would # be nice to m4 foreach this somehow, but whenever I tried diff --git a/contrib/ci.inria.fr/Jenkinsfile-basic b/contrib/ci.inria.fr/Jenkinsfile-basic index 9062750de6..9ba08236ae 100644 --- a/contrib/ci.inria.fr/Jenkinsfile-basic +++ b/contrib/ci.inria.fr/Jenkinsfile-basic @@ -26,7 +26,7 @@ pipeline { node('autotools') { checkout scm script { - gitBranch = sh (script: 'if test "$BRANCH_NAME"; then echo $BRANCH_NAME; else git branch | cut -c3-; fi', returnStdout: true).trim() + gitBranch = sh (script: 'if test "$BRANCH_NAME"; then echo $BRANCH_NAME; else git rev-parse --abbrev-ref HEAD; fi', returnStdout: true).trim() } sh 'contrib/ci.inria.fr/job-0-tarball.sh '+gitBranch script { diff --git a/contrib/ci.inria.fr/Jenkinsfile-extended b/contrib/ci.inria.fr/Jenkinsfile-extended index 2e5fab78d1..ea20b854dc 100644 --- a/contrib/ci.inria.fr/Jenkinsfile-extended +++ b/contrib/ci.inria.fr/Jenkinsfile-extended @@ -6,7 +6,7 @@ // + running Unix Debug checks // + building MinGW Windows zipballs // + running Embedded checks -// + running Sonarscanner analysis +// + running Coverity and Sonarscanner analysis // // The GIT checkout must use the remote branch name as the checkout local // branch name so that tarball names contain the branch name. @@ -36,7 +36,7 @@ pipeline { checkout scm script { gitRepoURL = sh (script: 'git config --get remote.origin.url', returnStdout: true).trim() - gitBranch = sh (script: 'if test "$BRANCH_NAME"; then echo $BRANCH_NAME; else git branch | cut -c3-; fi', returnStdout: true).trim() + gitBranch = sh (script: 'if test "$BRANCH_NAME"; then echo $BRANCH_NAME; else git rev-parse --abbrev-ref HEAD; fi', returnStdout: true).trim() } sh 'contrib/ci.inria.fr/job-0-tarball.sh '+gitBranch script { @@ -47,6 +47,7 @@ pipeline { dir('contrib/ci.inria.fr') { stash includes: "job-3-debug.sh", name: 'script-unix-debug' stash includes: "job-3-embedded.sh", name: 'script-embedded' + stash includes: "job-3-coverity.sh", name: 'script-coverity' stash includes: "job-3-sonarscanner.sh", name: 'script-sonarscanner' stash includes: "job-3-mingw.*", name: 'scripts-mingw' stash includes: "job-3-cygwin.*", name: 'scripts-cygwin' @@ -70,7 +71,10 @@ pipeline { if (env.NO_MINGW != 'true') { listOfNodeNames.push('MinGW') } - if (env.NO_SONAR != 'true') { + if (env.NO_COVERITY != 'true') { + listOfNodeNames.push('Coverity') + } + if (env.NO_SONARQUBE != 'true') { listOfNodeNames.push('SonarQube Scanner') } if (env.NO_EMBEDDED != 'true') { @@ -103,6 +107,16 @@ pipeline { deleteDir() } } + } else if (it == 'Coverity') { + node('coverity') { + dir('check-coverity') { + unstash 'tarballgz' + unstash 'script-coverity' + sh 'chmod 755 job-3-coverity.sh && ./job-3-coverity.sh '+gitRepoURL+' '+gitBranch+' '+tarballgz + if (env.KEEP_WORKING_DIRECTORY != 'true') + deleteDir() + } + } } else if (it == 'SonarQube Scanner') { node('sonarscanner') { dir('check-sonarscanner') { diff --git a/contrib/ci.inria.fr/job-1-visualstudio.bat b/contrib/ci.inria.fr/job-1-visualstudio.bat index 7f97e9f66a..6b9246d8f9 100644 --- a/contrib/ci.inria.fr/job-1-visualstudio.bat +++ b/contrib/ci.inria.fr/job-1-visualstudio.bat @@ -1,5 +1,5 @@ REM -REM Copyright © 2012-2018 Inria. All rights reserved. +REM Copyright © 2012-2019 Inria. All rights reserved. REM See COPYING in top-level directory. REM @@ -13,7 +13,7 @@ if %errorlevel% neq 0 exit /b %errorlevel% cd %TARBALL:~0,-7%\contrib\windows if %errorlevel% neq 0 exit /b %errorlevel% -%MSBUILD_PATH%\MSBuild hwloc.sln /p:Configuration=Release /p:Platform=x64 +%MSBUILD_PATH%\MSBuild hwloc.sln /p:Configuration=Release /p:Platform=x64 /p:PlatformToolset=v110 if %errorlevel% neq 0 exit /b %errorlevel% x64\Release\lstopo-no-graphics.exe diff --git a/contrib/ci.inria.fr/job-3-coverity.sh b/contrib/ci.inria.fr/job-3-coverity.sh new file mode 100755 index 0000000000..09b0d2719f --- /dev/null +++ b/contrib/ci.inria.fr/job-3-coverity.sh @@ -0,0 +1,67 @@ +#!/bin/bash +# +# Copyright © 2012-2019 Inria. All rights reserved. +# See COPYING in top-level directory. +# + +echo "############################" +echo "Running on:" +uname -a +echo "Tarball: $3" +echo "############################" + +set -e +set -x + +git_repo_url="$1" +hwloc_branch="$2" +tarball="$3" + +if test -z "$git_repo_url" || test -z "$hwloc_branch"; then + echo "Need repo URL and branch name as arguments." + exit 1 +fi + +# environment variables +test -f $HOME/.ciprofile && . $HOME/.ciprofile + +# check that the repo is the official one +# check that this is master +if test x$hwloc_branch != xmaster -o x$git_repo_url != xhttps://github.com/open-mpi/hwloc.git; then + if test x$FORCE_COVERITY = xtrue; then + echo "Forcing coverity on non-master-branch or non-official repository." + else + echo "Ignoring non-master-branch or non-official repository." + exit 0 + fi +fi + +# extract the tarball +basename=$(basename $tarball .tar.gz) +test -d $basename && chmod -R u+rwX $basename && rm -rf $basename +tar xfz $tarball +rm $tarball +cd $basename + +# ignore clock problems +touch configure + +# configure things +EMAIL=brice.goglin@labri.fr +VERSION=$basename +COVDIR=cov-int +COVBALL=myproject.tgz + +# run +./configure +cov-build --dir ${COVDIR} make all +cov-build --dir ${COVDIR} make check +tar cfvz ${COVBALL} ${COVDIR} +curl --form file=@${COVBALL} \ + --form "token=<${COVERITY_TOKEN_FILE}" \ + --form email=${EMAIL} \ + --form version=${VERSION} \ + --form description=manual \ + https://scan.coverity.com/builds?project=hwloc + +exit 0 diff --git a/contrib/ci.inria.fr/job-3-sonarscanner.sh b/contrib/ci.inria.fr/job-3-sonarscanner.sh index cd95323b3a..5c24850b87 100755 --- a/contrib/ci.inria.fr/job-3-sonarscanner.sh +++ b/contrib/ci.inria.fr/job-3-sonarscanner.sh @@ -7,7 +7,7 @@ echo "############################" echo "Running on:" uname -a -echo "Tarball: $1" +echo "Tarball: $3" echo "############################" set -e @@ -28,14 +28,14 @@ test -f $HOME/.ciprofile && . $HOME/.ciprofile # check that this is either master or vX.Y if test x$hwloc_branch != xmaster; then if test x$(echo "x${hwloc_branch}x" | sed -r -e 's/xv[0-9]+\.[0-9]+x//') != x; then - echo "Sending non-master and non-stable branch output to `tmp` branch on sonarqube server." + echo "Sending non-master and non-stable branch output to 'tmp' branch on sonarqube server." hwloc_branch=tmp fi fi # check that the repo is the official one if test x$git_repo_url != xhttps://github.com/open-mpi/hwloc.git; then - if test x$FORCE_SONAR_SCANNER = xtrue; then + if test x$FORCE_SONARQUBE = xtrue; then echo "Sending non-official repository output to 'tmp' branch on sonarqube server." hwloc_branch=tmp else @@ -133,7 +133,7 @@ sed -e '/#define HWLOC_HAVE_ATTRIBUTE/d' -i include/private/autogen/config.h # Create the config for sonar-scanner cat > sonar-project.properties << EOF -sonar.host.url=https://sonarqube.bordeaux.inria.fr/sonarqube +sonar.host.url=https://sonarqube.inria.fr/sonarqube sonar.login=$(cat ~/.sonarqube-hwloc-token) sonar.links.homepage=https://www.open-mpi.org/projects/hwloc/ sonar.links.ci=https://ci.inria.fr/hwloc/ diff --git a/contrib/completion/hwloc-completion.bash b/contrib/completion/hwloc-completion.bash index 841176d0b4..fe39813579 100644 --- a/contrib/completion/hwloc-completion.bash +++ b/contrib/completion/hwloc-completion.bash @@ -450,6 +450,7 @@ complete -F _hwloc-ps hwloc-ps _hwloc-gather-cpuid(){ local OPTIONS=(-c + -s --silent -h --help ) local cur=${COMP_WORDS[COMP_CWORD]} @@ -471,6 +472,7 @@ complete -F _hwloc-gather-cpuid hwloc-gather-cpuid _hwloc-gather-topology(){ local OPTIONS=(--io --dmi + --no-cpuid --keep -h --help ) diff --git a/contrib/dist/publish_doc b/contrib/dist/publish_doc index 19ca5f9c85..de39070884 100755 --- a/contrib/dist/publish_doc +++ b/contrib/dist/publish_doc @@ -14,11 +14,11 @@ fi EXAMPLE=v2.0.0 if test -d "$dst/projects/hwloc/doc/$EXAMPLE/"; then - dst="$dst/projects/hwloc/doc/" + dst=`realpath "$dst/projects/hwloc/doc/"` else if test -d "$dst/hwloc/doc/$EXAMPLE/"; then - dst="$dst/hwloc/doc/" + dst=`realpath "$dst/hwloc/doc/"` else if test -d "$dst/doc/$EXAMPLE/"; then - dst="$dst/doc/" + dst=`realpath "$dst/doc/"` else if test -d "$dst/$EXAMPLE/"; then : else diff --git a/contrib/hwloc-ps.www/package.json b/contrib/hwloc-ps.www/package.json index d97a6fd8e6..04e2b4a68d 100644 --- a/contrib/hwloc-ps.www/package.json +++ b/contrib/hwloc-ps.www/package.json @@ -12,6 +12,6 @@ "license": "ISC", "dependencies": { "express": "^4.16.4", - "open": "0.0.5" + "open": ">=6.0.0" } } diff --git a/contrib/windows/README b/contrib/windows/README index dc1da8fa4c..b3a23ddd57 100644 --- a/contrib/windows/README +++ b/contrib/windows/README @@ -1,33 +1,19 @@ This Visual Studio support is experimental. -Thanks to Eloi Gaudry for contributing the first draft of files. +Thanks to Eloi Gaudry for contributing most of these files. +This may be directly open in VS, or built from the command-line with: + %MSBUILD_PATH%\MSBuild hwloc.sln /p:Configuration=Release /p:Platform=x64 -No PlatformToolset is specified, so that the default target is always -used during build. -That means your build may not be compatible with older systems. -Enforce a specific toolset before building if needed. +If the specified platform toolset is too recent for your VS installation: + "The builds tools for v142 (Platform Toolset = 'v142') cannot be found." +You may force the build using your old one by adding + /p:PlatformToolset=v110 +See contrib/ci.inria.fr/job-1-visualstudio.bat for an example. -hwloc-compress-dir not built because needs work. + +hwloc-compress-dir not built because it needs work. hwloc-gather-topology is Linux specific. -hwloc-ps is not built because it does nothing on Windows anyway. -Possible code proposed by Eloi Gaudry: - // Get the process list snapshot - HANDLE hProcessSnapShot = CreateToolhelp32Snapshot(TH32CS_SNAPALL, 0); - // Initialize the process entry structure - PROCESSENTRY32 ProcessEntry = { 0 } ; - ProcessEntry.dwSize = sizeof(ProcessEntry); - // Get the first process info - if (Process32First( hProcessSnapShot, &ProcessEntry)) { - do { - HANDLE hProcess = OpenProcess(PROCESS_QUERY_INFORMATION, FALSE, ProcessEntry.th32ProcessID) ; - if (hProcess) { - std::cerr << ProcessEntry.th32ProcessID << "(" << ProcessEntry.szExeFile << ")" << std::endl ; - } - // check the PROCESSENTRY32 for other members. - } while (Process32Next(hProcessSnapShot, &ProcessEntry)); - // Close the handle - CloseHandle( hProcessSnapShot ) ; - } +hwloc-ps is not built because it does nothing on Windows anyway (see #367). diff --git a/contrib/windows/hwloc-annotate.vcxproj b/contrib/windows/hwloc-annotate.vcxproj index b4dd4d7d8a..f8372d599e 100644 --- a/contrib/windows/hwloc-annotate.vcxproj +++ b/contrib/windows/hwloc-annotate.vcxproj @@ -4,6 +4,7 @@ {9DE76C6D-4773-4766-9F93-69C56166CB8F} hwloc-bind Win32Proj + 10.0 @@ -17,7 +18,7 @@ false true MultiByte - v110 + v142 @@ -30,9 +31,12 @@ false true $(ProjectName)-12 - $(SolutionDir)$(Platform)\$(Configuration)\ + $(SolutionDir)$(Platform)\$(Configuration)\$(ProjectName)\ $(SolutionDir)$(Platform)\$(Configuration)\ + + $(Platform)\$(Configuration)\$(ProjectName)\ + Level3 @@ -83,6 +87,12 @@ + + + {9DE76C6D-4773-4766-9F93-69C56166CB8D} + false + + diff --git a/contrib/windows/hwloc-bind.vcxproj b/contrib/windows/hwloc-bind.vcxproj index be130a8f46..c38bccdc8e 100644 --- a/contrib/windows/hwloc-bind.vcxproj +++ b/contrib/windows/hwloc-bind.vcxproj @@ -4,6 +4,7 @@ {9DE76C6D-4773-4766-9F93-69C56166CB91} hwloc-bind Win32Proj + 10.0 @@ -17,7 +18,7 @@ false true MultiByte - v110 + v142 @@ -30,9 +31,12 @@ false true $(ProjectName)-12 - $(SolutionDir)$(Platform)\$(Configuration)\ + $(SolutionDir)$(Platform)\$(Configuration)\$(ProjectName)\ $(SolutionDir)$(Platform)\$(Configuration)\ + + $(Platform)\$(Configuration)\$(ProjectName)\ + Level3 @@ -84,6 +88,12 @@ + + + {9DE76C6D-4773-4766-9F93-69C56166CB8D} + false + + diff --git a/contrib/windows/hwloc-calc.vcxproj b/contrib/windows/hwloc-calc.vcxproj index 38cc4c5151..c1aa6e57ba 100644 --- a/contrib/windows/hwloc-calc.vcxproj +++ b/contrib/windows/hwloc-calc.vcxproj @@ -4,6 +4,7 @@ {9DE76C6D-4773-4766-9F93-69C56166CB92} hwloc-calc Win32Proj + 10.0 @@ -17,7 +18,7 @@ false true MultiByte - v110 + v142 @@ -30,9 +31,12 @@ false true $(ProjectName)-12 - $(SolutionDir)$(Platform)\$(Configuration)\ + $(SolutionDir)$(Platform)\$(Configuration)\$(ProjectName)\ $(SolutionDir)$(Platform)\$(Configuration)\ + + $(Platform)\$(Configuration)\$(ProjectName)\ + Level3 @@ -84,6 +88,12 @@ + + + {9DE76C6D-4773-4766-9F93-69C56166CB8D} + false + + diff --git a/contrib/windows/hwloc-diff.vcxproj b/contrib/windows/hwloc-diff.vcxproj index fb2e0a0d92..41728a3799 100644 --- a/contrib/windows/hwloc-diff.vcxproj +++ b/contrib/windows/hwloc-diff.vcxproj @@ -4,6 +4,7 @@ {9DE76C6D-4773-4766-9F93-69C56166CB93} hwloc-bind Win32Proj + 10.0 @@ -17,7 +18,7 @@ false true MultiByte - v110 + v142 @@ -30,9 +31,12 @@ false true $(ProjectName)-12 - $(SolutionDir)$(Platform)\$(Configuration)\ + $(SolutionDir)$(Platform)\$(Configuration)\$(ProjectName)\ $(SolutionDir)$(Platform)\$(Configuration)\ + + $(Platform)\$(Configuration)\$(ProjectName)\ + Level3 @@ -82,6 +86,12 @@ + + + {9DE76C6D-4773-4766-9F93-69C56166CB8D} + false + + diff --git a/contrib/windows/hwloc-distrib.vcxproj b/contrib/windows/hwloc-distrib.vcxproj index d4161f573a..fe6a64fa28 100644 --- a/contrib/windows/hwloc-distrib.vcxproj +++ b/contrib/windows/hwloc-distrib.vcxproj @@ -4,6 +4,7 @@ {9DE76C6D-4773-4766-9F93-69C56166CB95} hwloc-bind Win32Proj + 10.0 @@ -17,7 +18,7 @@ false true MultiByte - v110 + v142 @@ -30,9 +31,12 @@ false true $(ProjectName)-12 - $(SolutionDir)$(Platform)\$(Configuration)\ + $(SolutionDir)$(Platform)\$(Configuration)\$(ProjectName)\ $(SolutionDir)$(Platform)\$(Configuration)\ + + $(Platform)\$(Configuration)\$(ProjectName)\ + Level3 @@ -83,6 +87,12 @@ + + + {9DE76C6D-4773-4766-9F93-69C56166CB8D} + false + + diff --git a/contrib/windows/hwloc-gather-cpuid.vcxproj b/contrib/windows/hwloc-gather-cpuid.vcxproj index cca9c6ca7a..6a8477062f 100644 --- a/contrib/windows/hwloc-gather-cpuid.vcxproj +++ b/contrib/windows/hwloc-gather-cpuid.vcxproj @@ -4,6 +4,7 @@ {9DE76C6D-4773-4766-9F93-69C56166CB9A} hwloc-bind Win32Proj + 10.0 @@ -17,7 +18,7 @@ false true MultiByte - v110 + v142 @@ -30,9 +31,12 @@ false true $(ProjectName)-12 - $(SolutionDir)$(Platform)\$(Configuration)\ + $(SolutionDir)$(Platform)\$(Configuration)\$(ProjectName)\ $(SolutionDir)$(Platform)\$(Configuration)\ + + $(Platform)\$(Configuration)\$(ProjectName)\ + Level3 @@ -72,6 +76,12 @@ + + + {9DE76C6D-4773-4766-9F93-69C56166CB8D} + false + + diff --git a/contrib/windows/hwloc-info.vcxproj b/contrib/windows/hwloc-info.vcxproj index 3e03b46f25..90e1bbb15c 100644 --- a/contrib/windows/hwloc-info.vcxproj +++ b/contrib/windows/hwloc-info.vcxproj @@ -4,6 +4,7 @@ {9DE76C6D-4773-4766-9F93-69C56166CB96} hwloc-info Win32Proj + 10.0 @@ -17,7 +18,7 @@ false true MultiByte - v110 + v142 @@ -30,9 +31,12 @@ false true $(ProjectName)-12 - $(SolutionDir)$(Platform)\$(Configuration)\ + $(SolutionDir)$(Platform)\$(Configuration)\$(ProjectName)\ $(SolutionDir)$(Platform)\$(Configuration)\ + + $(Platform)\$(Configuration)\$(ProjectName)\ + Level3 @@ -84,6 +88,12 @@ + + + {9DE76C6D-4773-4766-9F93-69C56166CB8D} + false + + diff --git a/contrib/windows/hwloc-patch.vcxproj b/contrib/windows/hwloc-patch.vcxproj index f21ea7a397..0982a22021 100644 --- a/contrib/windows/hwloc-patch.vcxproj +++ b/contrib/windows/hwloc-patch.vcxproj @@ -4,6 +4,7 @@ {9DE76C6D-4773-4766-9F93-69C56166CB97} hwloc-bind Win32Proj + 10.0 @@ -17,7 +18,7 @@ false true MultiByte - v110 + v142 @@ -30,9 +31,12 @@ false true $(ProjectName)-12 - $(SolutionDir)$(Platform)\$(Configuration)\ + $(SolutionDir)$(Platform)\$(Configuration)\$(ProjectName)\ $(SolutionDir)$(Platform)\$(Configuration)\ + + $(Platform)\$(Configuration)\$(ProjectName)\ + Level3 @@ -82,6 +86,12 @@ + + + {9DE76C6D-4773-4766-9F93-69C56166CB8D} + false + + diff --git a/contrib/windows/hwloc.sln b/contrib/windows/hwloc.sln index 27f101780e..93b4ebba3b 100644 --- a/contrib/windows/hwloc.sln +++ b/contrib/windows/hwloc.sln @@ -1,62 +1,31 @@  Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio Express 2012 for Windows Desktop +# Visual Studio Version 16 +VisualStudioVersion = 16.0.29306.81 +MinimumVisualStudioVersion = 10.0.40219.1 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libhwloc", "libhwloc.vcxproj", "{9DE76C6D-4773-4766-9F93-69C56166CB8D}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "lstopo-no-graphics", "lstopo-no-graphics.vcxproj", "{9DE76C6D-4773-4766-9F93-69C56166CB8E}" - ProjectSection(ProjectDependencies) = postProject - {9DE76C6D-4773-4766-9F93-69C56166CB8D} = {9DE76C6D-4773-4766-9F93-69C56166CB8D} - EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hwloc-annotate", "hwloc-annotate.vcxproj", "{9DE76C6D-4773-4766-9F93-69C56166CB8F}" - ProjectSection(ProjectDependencies) = postProject - {9DE76C6D-4773-4766-9F93-69C56166CB8D} = {9DE76C6D-4773-4766-9F93-69C56166CB8D} - EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hwloc-bind", "hwloc-bind.vcxproj", "{9DE76C6D-4773-4766-9F93-69C56166CB91}" - ProjectSection(ProjectDependencies) = postProject - {9DE76C6D-4773-4766-9F93-69C56166CB8D} = {9DE76C6D-4773-4766-9F93-69C56166CB8D} - EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hwloc-calc", "hwloc-calc.vcxproj", "{9DE76C6D-4773-4766-9F93-69C56166CB92}" - ProjectSection(ProjectDependencies) = postProject - {9DE76C6D-4773-4766-9F93-69C56166CB8D} = {9DE76C6D-4773-4766-9F93-69C56166CB8D} - EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hwloc-diff", "hwloc-diff.vcxproj", "{9DE76C6D-4773-4766-9F93-69C56166CB93}" - ProjectSection(ProjectDependencies) = postProject - {9DE76C6D-4773-4766-9F93-69C56166CB8D} = {9DE76C6D-4773-4766-9F93-69C56166CB8D} - EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hwloc-distrib", "hwloc-distrib.vcxproj", "{9DE76C6D-4773-4766-9F93-69C56166CB95}" - ProjectSection(ProjectDependencies) = postProject - {9DE76C6D-4773-4766-9F93-69C56166CB8D} = {9DE76C6D-4773-4766-9F93-69C56166CB8D} - EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hwloc-info", "hwloc-info.vcxproj", "{9DE76C6D-4773-4766-9F93-69C56166CB96}" - ProjectSection(ProjectDependencies) = postProject - {9DE76C6D-4773-4766-9F93-69C56166CB8D} = {9DE76C6D-4773-4766-9F93-69C56166CB8D} - EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hwloc-patch", "hwloc-patch.vcxproj", "{9DE76C6D-4773-4766-9F93-69C56166CB97}" - ProjectSection(ProjectDependencies) = postProject - {9DE76C6D-4773-4766-9F93-69C56166CB8D} = {9DE76C6D-4773-4766-9F93-69C56166CB8D} - EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "lstopo", "lstopo.vcxproj", "{9DE76C6D-4773-4766-9F93-69C56166CB98}" - ProjectSection(ProjectDependencies) = postProject - {9DE76C6D-4773-4766-9F93-69C56166CB8D} = {9DE76C6D-4773-4766-9F93-69C56166CB8D} - EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "lstopo-win", "lstopo-win.vcxproj", "{9DE76C6D-4773-4766-9F93-69C56166CB99}" - ProjectSection(ProjectDependencies) = postProject - {9DE76C6D-4773-4766-9F93-69C56166CB8D} = {9DE76C6D-4773-4766-9F93-69C56166CB8D} - EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hwloc-gather-cpuid", "hwloc-gather-cpuid.vcxproj", "{9DE76C6D-4773-4766-9F93-69C56166CB9A}" - ProjectSection(ProjectDependencies) = postProject - {9DE76C6D-4773-4766-9F93-69C56166CB8D} = {9DE76C6D-4773-4766-9F93-69C56166CB8D} - EndProjectSection EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -107,4 +76,7 @@ Global GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {A91DB611-31B4-4CBC-A866-C9E529108BD0} + EndGlobalSection EndGlobal diff --git a/contrib/windows/hwloc_config.h b/contrib/windows/hwloc_config.h index 36669de556..06963b3640 100644 --- a/contrib/windows/hwloc_config.h +++ b/contrib/windows/hwloc_config.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2018 Inria. All rights reserved. + * Copyright © 2009-2019 Inria. All rights reserved. * Copyright © 2009-2012 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -11,9 +11,9 @@ #ifndef HWLOC_CONFIG_H #define HWLOC_CONFIG_H -#define HWLOC_VERSION "2.1.0" +#define HWLOC_VERSION "2.2.0" #define HWLOC_VERSION_MAJOR 2 -#define HWLOC_VERSION_MINOR 1 +#define HWLOC_VERSION_MINOR 2 #define HWLOC_VERSION_RELEASE 0 #define HWLOC_VERSION_GREEK "" diff --git a/contrib/windows/libhwloc.vcxproj b/contrib/windows/libhwloc.vcxproj index 8bedb46341..b50c12996c 100644 --- a/contrib/windows/libhwloc.vcxproj +++ b/contrib/windows/libhwloc.vcxproj @@ -4,6 +4,7 @@ {9DE76C6D-4773-4766-9F93-69C56166CB8D} libhwloc Win32Proj + 10.0 @@ -28,27 +29,27 @@ DynamicLibrary true MultiByte - v110 + v142 StaticLibrary true MultiByte - v110 + v142 DynamicLibrary false true MultiByte - v110 + v142 StaticLibrary false true MultiByte - v110 + v142 @@ -70,28 +71,28 @@ false true $(ProjectName)-15 - $(SolutionDir)$(Platform)\$(Configuration)\ + $(SolutionDir)$(Platform)\$(Configuration)\$(ProjectName)\ $(SolutionDir)$(Platform)\$(Configuration)\ false true $(ProjectName)-15 - $(SolutionDir)$(Platform)\$(Configuration)\ + $(SolutionDir)$(Platform)\$(Configuration)\$(ProjectName)\ $(SolutionDir)$(Platform)\$(Configuration)\ false true $(ProjectName)-15 - $(SolutionDir)$(Platform)\$(Configuration)\ + $(SolutionDir)$(Platform)\$(Configuration)\$(ProjectName)\ $(SolutionDir)$(Platform)\$(Configuration)\ false true $(ProjectName)-15 - $(SolutionDir)$(Platform)\$(Configuration)\ + $(SolutionDir)$(Platform)\$(Configuration)\$(ProjectName)\ $(SolutionDir)$(Platform)\$(Configuration)\ @@ -200,6 +201,7 @@ + diff --git a/contrib/windows/libhwloc.vcxproj.filters b/contrib/windows/libhwloc.vcxproj.filters index d550d00482..8165997dea 100644 --- a/contrib/windows/libhwloc.vcxproj.filters +++ b/contrib/windows/libhwloc.vcxproj.filters @@ -1,4 +1,4 @@ - + @@ -30,6 +30,9 @@ Source Files + + Source Files + Source Files diff --git a/contrib/windows/lstopo-no-graphics.vcxproj b/contrib/windows/lstopo-no-graphics.vcxproj index a9c8dfa439..ac8ec9997e 100644 --- a/contrib/windows/lstopo-no-graphics.vcxproj +++ b/contrib/windows/lstopo-no-graphics.vcxproj @@ -4,6 +4,7 @@ {9DE76C6D-4773-4766-9F93-69C56166CB8E} lstopo-no-graphics Win32Proj + 10.0 @@ -17,7 +18,7 @@ false true MultiByte - v110 + v142 @@ -30,9 +31,12 @@ false true $(ProjectName)-12 - $(SolutionDir)$(Platform)\$(Configuration)\ + $(SolutionDir)$(Platform)\$(Configuration)\$(ProjectName)\ $(SolutionDir)$(Platform)\$(Configuration)\ + + $(Platform)\$(Configuration)\$(ProjectName)\ + Level3 @@ -92,6 +96,12 @@ + + + {9DE76C6D-4773-4766-9F93-69C56166CB8D} + false + + diff --git a/contrib/windows/lstopo-win.vcxproj b/contrib/windows/lstopo-win.vcxproj index 823c5a8e10..b284e72b95 100644 --- a/contrib/windows/lstopo-win.vcxproj +++ b/contrib/windows/lstopo-win.vcxproj @@ -4,6 +4,7 @@ {9DE76C6D-4773-4766-9F93-69C56166CB99} lstopo-no-graphics Win32Proj + 10.0 @@ -17,7 +18,7 @@ false true MultiByte - v110 + v142 @@ -30,9 +31,12 @@ false true $(ProjectName)-12 - $(SolutionDir)$(Platform)\$(Configuration)\ + $(SolutionDir)$(Platform)\$(Configuration)\$(ProjectName)\ $(SolutionDir)$(Platform)\$(Configuration)\ + + $(Platform)\$(Configuration)\$(ProjectName)\ + Level3 @@ -94,6 +98,12 @@ + + + {9DE76C6D-4773-4766-9F93-69C56166CB8D} + false + + diff --git a/contrib/windows/lstopo.vcxproj b/contrib/windows/lstopo.vcxproj index 3027bec40e..f70f8ff194 100644 --- a/contrib/windows/lstopo.vcxproj +++ b/contrib/windows/lstopo.vcxproj @@ -4,6 +4,7 @@ {9DE76C6D-4773-4766-9F93-69C56166CB98} lstopo-no-graphics Win32Proj + 10.0 @@ -17,7 +18,7 @@ false true MultiByte - v110 + v142 @@ -30,9 +31,12 @@ false true $(ProjectName)-12 - $(SolutionDir)$(Platform)\$(Configuration)\ + $(SolutionDir)$(Platform)\$(Configuration)\$(ProjectName)\ $(SolutionDir)$(Platform)\$(Configuration)\ + + $(Platform)\$(Configuration)\$(ProjectName)\ + Level3 @@ -93,6 +97,12 @@ + + + {9DE76C6D-4773-4766-9F93-69C56166CB8D} + false + + diff --git a/doc/Makefile.am b/doc/Makefile.am index 88bad10301..9ece589dd1 100644 --- a/doc/Makefile.am +++ b/doc/Makefile.am @@ -1,4 +1,4 @@ -# Copyright © 2009-2018 Inria. All rights reserved. +# Copyright © 2009-2019 Inria. All rights reserved. # Copyright © 2009-2013 Université Bordeaux # Copyright © 2009-2016 Cisco Systems, Inc. All rights reserved. # See COPYING in top-level directory. @@ -80,7 +80,6 @@ dox_inputs = $(DOX_CONFIG) \ $(hwloc_include_dir)/hwloc/cudart.h \ $(hwloc_include_dir)/hwloc/nvml.h \ $(hwloc_include_dir)/hwloc/gl.h \ - $(hwloc_include_dir)/hwloc/intel-mic.h \ $(hwloc_include_dir)/hwloc/openfabrics-verbs.h \ $(srcdir)/netloc.doxy \ $(hwloc_include_dir)/netloc.h @@ -642,6 +641,8 @@ man3_helper_distances_DATA = \ $(DOX_MAN_DIR)/man3/hwloc_distances_get.3 \ $(DOX_MAN_DIR)/man3/hwloc_distances_get_by_depth.3 \ $(DOX_MAN_DIR)/man3/hwloc_distances_get_by_type.3 \ + $(DOX_MAN_DIR)/man3/hwloc_distances_get_by_name.3 \ + $(DOX_MAN_DIR)/man3/hwloc_distances_get_name.3 \ $(DOX_MAN_DIR)/man3/hwloc_distances_release.3 \ $(DOX_MAN_DIR)/man3/hwlocality_distances_consult.3 \ $(DOX_MAN_DIR)/man3/hwloc_distances_obj_index.3 \ @@ -653,7 +654,8 @@ man3_helper_distances_DATA = \ $(DOX_MAN_DIR)/man3/hwloc_distances_add.3 \ $(DOX_MAN_DIR)/man3/hwloc_distances_remove.3 \ $(DOX_MAN_DIR)/man3/hwloc_distances_remove_by_depth.3 \ - $(DOX_MAN_DIR)/man3/hwloc_distances_remove_by_type.3 + $(DOX_MAN_DIR)/man3/hwloc_distances_remove_by_type.3 \ + $(DOX_MAN_DIR)/man3/hwloc_distances_release_remove.3 man3_helper_advanced_iodir = $(man3dir) man3_helper_advanced_io_DATA = \ @@ -713,10 +715,7 @@ man3_cuda_DATA = \ $(DOX_MAN_DIR)/man3/hwlocality_gl.3 \ $(DOX_MAN_DIR)/man3/hwloc_gl_get_display_osdev_by_port_device.3 \ $(DOX_MAN_DIR)/man3/hwloc_gl_get_display_osdev_by_name.3 \ - $(DOX_MAN_DIR)/man3/hwloc_gl_get_display_by_osdev.3 \ - $(DOX_MAN_DIR)/man3/hwlocality_intel_mic.3 \ - $(DOX_MAN_DIR)/man3/hwloc_intel_mic_get_device_cpuset.3 \ - $(DOX_MAN_DIR)/man3/hwloc_intel_mic_get_device_osdev_by_index.3 + $(DOX_MAN_DIR)/man3/hwloc_gl_get_display_by_osdev.3 man3_glibc_scheddir = $(man3dir) man3_glibc_sched_DATA = \ diff --git a/doc/doxygen-config.cfg.in b/doc/doxygen-config.cfg.in index 983e358fe5..6bf199f398 100644 --- a/doc/doxygen-config.cfg.in +++ b/doc/doxygen-config.cfg.in @@ -1,4 +1,4 @@ -# Copyright © 2010-2017 Inria. All rights reserved. +# Copyright © 2010-2019 Inria. All rights reserved. # Copyright © 2009 Cisco Systems, Inc. All rights reserved. # See COPYING in top-level directory. @@ -23,7 +23,6 @@ INPUT = \ @top_srcdir@/include/hwloc/cudart.h \ @top_srcdir@/include/hwloc/nvml.h \ @top_srcdir@/include/hwloc/gl.h \ - @top_srcdir@/include/hwloc/intel-mic.h \ @top_srcdir@/include/hwloc/openfabrics-verbs.h \ @top_srcdir@/include/hwloc/diff.h \ @top_srcdir@/include/hwloc/shmem.h \ diff --git a/doc/examples/gpu.c b/doc/examples/gpu.c index 81981824ba..593ab47e8f 100644 --- a/doc/examples/gpu.c +++ b/doc/examples/gpu.c @@ -3,7 +3,7 @@ * - getting CUDA and OpenCL attributes * - displaying the locality of the GPU * - * Copyright © 2009-2018 Inria. All rights reserved. + * Copyright © 2009-2019 Inria. All rights reserved. * Copyright © 2009-2011,2017 Université Bordeaux * Copyright © 2009-2010 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -98,7 +98,7 @@ int main(void) char name[16]; hwloc_obj_type_snprintf(name, sizeof(name), obj, 0); hwloc_bitmap_asprintf(&cpuset_string, obj->cpuset); - printf("Location: %s P#%d\n", name, obj->os_index); + printf("Location: %s P#%u\n", name, obj->os_index); printf("Cpuset: %s\n", cpuset_string); } printf("\n"); diff --git a/doc/hwloc.doxy b/doc/hwloc.doxy index c360286aec..15b5f1b0b7 100644 --- a/doc/hwloc.doxy +++ b/doc/hwloc.doxy @@ -31,7 +31,7 @@ for links to more sections about hwloc concepts. hwloc provides command line tools and a C API to obtain the hierarchical map of key computing elements within a node, such as: NUMA memory -nodes, shared caches, processor packages, processor cores, +nodes, shared caches, processor packages, dies and cores, processing units (logical processors or "threads") and even I/O devices. hwloc also gathers various attributes such as @@ -515,7 +515,7 @@ Make sure to have had a look at those too! detailed in the ::hwloc_obj_type_t enumeration. There are four kinds of Objects: Memory (NUMA nodes and Memory-side caches), I/O (Bridges, PCI and OS devices), - Misc, and Normal (everything else, including Machine, Package, Core, PU, CPU Caches, etc.). + Misc, and Normal (everything else, including Machine, Package, Die, Core, PU, CPU Caches, etc.). Normal and Memory objects have (non-NULL) CPU sets and nodesets, while I/O and Misc don't. Objects are topologically sorted by locality (CPU and node sets) @@ -724,7 +724,7 @@ It is attached as a Memory child (in green) and has a virtual depth (nega It could also have siblings if there were multiple local NUMA nodes, or cousins if other NUMA nodes were attached somewhere else in the machine. -I/O or Misc object could be attached in a similar manner. +I/O or Misc objects could be attached in a similar manner. @@ -907,6 +907,9 @@ hwloc-gather-cpuid is a x86-specific tool that dumps the result of CPUID instructions on the current machine into a directory. +The output of hwloc-gather-cpuid is included in the tarball +saved by hwloc-gather-topology when running on Linux/x86. + These files may be used later (possibly offline) for simulating or debugging a machine without actually running on it. @@ -999,6 +1002,17 @@ following environment variables. actual displaying of these error messages. +
HWLOC_USE_NUMA_DISTANCES=7
+
enables or disables the use of NUMA distances. + NUMA distances and memory target/initiator information may be used + to improve the locality of NUMA nodes, especially CPU-less nodes. + Bits in the value of this environment variable enable different features: + Bit 0 enables the gathering of NUMA distances from the operating system. + Bit 1 further enables the use of NUMA distances to improve the + locality of CPU-less nodes. + Bit 2 enables the use of target/initiator information. +
+
HWLOC_GROUPING=1
enables or disables objects grouping based on distances. By default, hwloc uses distance matrices between objects (either read @@ -1088,13 +1102,21 @@ following environment variables. Setting this environment variable to 1 will expose it as a proper Memory-side cache.
+
HWLOC_ANNOTATE_GLOBAL_COMPONENTS=0
+
Allow components to annotate the topology even if they are + usually excluded by global components by default. + Setting this variable to 1 and also setting HWLOC_COMPONENTS=xml,pci,stop + enables the addition of PCI vendor and model info attributes to a XML topology + that was generated without those names (if pciaccess was missing). +
+
HWLOC_FSROOT=/path/to/linux/filesystem-root/
switches to reading the topology from the specified Linux filesystem root instead of the main file-system root. This directory may have been saved previously from another machine with hwloc-gather-topology.
- One should likely also set HWLOC_COMPONENTS=linux,linuxio,stop + One should likely also set HWLOC_COMPONENTS=linux,stop so that non-Linux backends are disabled (the -i option of command-line tools takes care of both).
@@ -1137,7 +1159,8 @@ following environment variables.
forces a list of components to enable or disable. Enable or disable the given comma-separated list of components (if they do not conflict with each other). Component names prefixed with - - are disabled. + - are disabled (a single phase may also be disabled). + Once the end of the list is reached, hwloc falls back to enabling the remaining components (sorted by priority) that do not conflict with the already enabled ones, and unless explicitly disabled in the @@ -1447,9 +1470,6 @@ components when I/O discovery is enabled and supported. (OpenCL component)
  • cuda0 for the first NVIDIA CUDA device (CUDA component, using the NVIDIA CUDA Library)
  • -
  • mic0 for the first Intel Xeon Phi (MIC) coprocessor - (Linux component)
  • -
  • DMA engine channel (::HWLOC_OBJ_OSDEV_DMA)
      @@ -1865,23 +1885,6 @@ and SectorSize (in bytes). and size of the shared memory in each multiprocessor of a CUDA device. Sizes are in kB.
  • -
    MICSerialNumber (MIC coprocessor OS device)
    -
    - The serial number of an Intel Xeon Phi (MIC) coprocessor. - hwloc may run either inside the coprocessor itself, or on the host processor. - That attribute is set in both cases, so that the exact same coprocessor may be - identified from both point of views, even if there are multiple nodes with - multiple MICs. - When running hwloc on the host, each hwloc OS device object that - corresponds to a Xeon Phi gets such an attribute. - When running hwloc inside a Xeon Phi coprocessor, the root of the topology - (Machine object) gets this attribute. -
    -
    MICFamily, MICSKU, MICActiveCores, MICMemorySize (MIC coprocessor OS device)
    -
    The family, SKU (model), - number of active cores, and memory size (in kB) - of an Intel Xeon Phi (MIC) coprocessor. -
    Address, Port (Network interface OS devices)
    The MAC address and the port number of a software network interface, such as eth4 on Linux. @@ -2039,7 +2042,8 @@ libxml2 support as a plugin. \section xml_errors XML import error management Importing XML files can fail at least because of file access errors, -invalid XML syntax or non-hwloc-valid XML contents. +invalid XML syntax, non-hwloc-valid XML contents, +or incompatibilities between hwloc releases (see \ref faq_version_xml). Both backend cannot detect all these errors when the input XML file or buffer is selected (when hwloc_topology_set_xml() or @@ -2205,7 +2209,7 @@ Package:1 L3Cache:1 L2Cache:2 L1dCache:1 L1iCache:1 Core:1 PU:2 \endverbatim The exported string may be passed back to hwloc for recreating -another similar topology. +another similar topology (see also \ref faq_version_synthetic). The entire tree will be similar, but some attributes such as the processor model will be missing. @@ -2277,14 +2281,6 @@ enabled for the current topology. information (if I/O device discovery is enabled).
    -
    Intel Xeon Phi (MIC)
    -
    - hwloc/intel-mic.h helps interoperability with Intel Xeon Phi (MIC) - coprocessors by returning the list of processors near these devices. - It may also return the corresponding OS device hwloc object for further - information (if I/O device discovery is enabled). -
    -
    OpenCL
    hwloc/opencl.h enables interoperability with the OpenCL interface. @@ -2472,6 +2468,8 @@ Most of them, including most native OS components, do nothing unless the topology is still empty. Some others, such as x86 and pci, can complete and annotate what other backends found earlier. +Discovery is performed by phases: CPUs are first discovered, +then memory is attached, then PCI, etc. Default priorities ensure that clever components are invoked first. Native operating system components have higher priorities, @@ -2528,8 +2526,9 @@ It is possible to prevent some components from being loaded by prefixing their name with - in the list. For instance x86,-pci will load the x86 component, then let hwloc load all the usual components except pci. +A single component phase may also be blacklisted, for instance with -linux:io. hwloc_topology_set_components() may also be used inside the program -to prevent the loading of a specific component for the target topology. +to prevent the loading of a specific component (or phases) for the target topology. It is possible to prevent all remaining components from being loaded by placing stop in the environment variable. @@ -2567,60 +2566,6 @@ Also note that plugins should carefully be enabled and used when embedding hwloc in another project, see \ref embed for details. -\htmlonly -
    -\endhtmlonly -\section plugins_adding Adding new discovery components and plugins - -The types and functions cited below are declared in the hwloc/plugins.h header. -Components are supposed to only use hwloc public headers (hwloc.h and -anything under the include/hwloc subdirectory) and nothing from the -include/private subdirectory in the source tree. - -\subsection plugins_disc_basic Basics of discovery components - -Each discovery component is defined by a ::hwloc_disc_component -structure which contains an instantiate() callback. -This function is invoked when this component is actually used by a topology. -It fills a new ::hwloc_backend structure that usually contains -a discover() callback taking care of the actual topology discovery. - -\note If two discovery components have the same name, only the highest -priority one is actually made available. -This offers a way for third-party plugins to override existing components. - -\subsection plugins_disc_register Registering a new discovery component - -Registering components to the hwloc core relies on a ::hwloc_component structure. -Its data field points to the previously defined ::hwloc_disc_component -structure while its type should be ::HWLOC_COMPONENT_TYPE_DISC. -This structure should be named hwloc_<name>_component. - -The configure script should be modified to add <name> -to its hwloc_components shell variable so that the component -is actually available. - -\note The symbol name of the ::hwloc_component structure -is independent of the name of the discovery component mentioned -in the previous section. - -When the component is statically built inside the hwloc library, -the symbol hwloc_<name>_component is added by configure -to the src/static-components.h. -The core then registers all components listed in this file. - -If the new component may be built as a plugin, the configure script -should also define the shell variable -hwloc_<name>_component_maybeplugin=1. -When the configure script actually enables the component as a plugin, -it will set the variable hwloc_<name>_component -to plugin. -The build system may then use this variable to change the way the component is built. -It should create a hwloc_<name>.so shared object. -All these files are loaded in alphabetic order, and the components -they contain are registered to the hwloc core. - - \htmlonly
    \endhtmlonly @@ -2633,7 +2578,14 @@ environment variable (see \ref envvar).
    -
    aix, darwin, freebsd, hpux, linux, netbsd, solaris, windows
    +
    linux
    +
    + The official component for discovering CPU, memory and I/O devices on Linux. + It discovers PCI devices without the help of external libraries such as libpciaccess, + but requires the pci component for adding vendor/device names to PCI objects. + It also discovers many kinds of Linux-specific OS devices. +
    +
    aix, darwin, freebsd, hpux, netbsd, solaris, windows
    Each officially supported operating system has its own native component, which is statically built when supported, and which is used by default. @@ -2660,16 +2612,10 @@ environment variable (see \ref envvar).
    pci
    PCI object discovery uses the external pciaccess library (aka libpciaccess); - see \ref iodevices. + see \ref iodevices. It may also annotate existing PCI devices with vendor + and device names. It may be built as a plugin.
    -
    linuxio (former linuxpci)
    -
    - This component can probe I/O devices on Linux. - It discovers PCI devices without the help of external libraries such as libpciaccess. - Its priority is lower than the pci component because it misses device names. - It also discovers many kinds of Linux-specific OS devices. -
    opencl
    The OpenCL component creates co-processor OS device objects such as @@ -2842,7 +2788,7 @@ HWLOC_SETUP_CORE must be invoked if using the m4 macros): hwloc's types and public symbols with "foo_"; meaning that function hwloc_init() becomes foo_hwloc_init(). Enum values are prefixed with an upper-case translation if the prefix supplied; - HWLOC_OBJ_CORE becomes FOO_HWLOC_OBJ_CORE. This is recommended + HWLOC_OBJ_CORE becomes FOO_hwloc_OBJ_CORE. This is recommended behavior if you are including hwloc in middleware -- it is possible that your software will be combined with other software that links to another copy of hwloc. If both uses of hwloc utilize different @@ -3094,7 +3040,7 @@ hwloc currently uses Groups for the following reasons:
  • Distance-based groups made of close objects.
  • AMD Bulldozer dual-core compute units (subtype is ComputeUnit, in the x86 backend), but these objects are usually merged with the L2 caches.
  • -
  • Intel extended topology enumeration unknown levels (in the x86 backend).
  • +
  • Intel Extended Topology Enumeration levels (in the x86 backend).
  • Windows processor groups (unless they contain a single NUMA node, or a single Package, etc.).
  • IBM S/390 "Books" on Linux (subtype is Book).
  • AIX unknown hierarchy levels.
  • @@ -3106,9 +3052,12 @@ It means that a Group containing a single child is merged into that child. And a Group is merged into its parent if it is its only child. For instance a Windows processor group containing a single NUMA node -would be merged with that NUMA node since it already contains the +would be merged with that NUMA node since it already contains the relevant hierarchy information. +When inserting a custom Group with hwloc_hwloc_topology_insert_group_object(), +this merging may be disabled by setting its dont_merge attribute. + \subsection faq_asymmetric What happens if my topology is asymmetric? @@ -3503,80 +3452,6 @@ You should pass the following command-line option to Valgrind to use it: \endverbatim -\subsection faq_upgrade How do I handle ABI breaks and API upgrades? - -The hwloc interface is extended with every new major release. -Any application using the hwloc API should be prepared to check at -compile-time whether some features are available in the currently -installed hwloc distribution. - -For instance, to check whether the hwloc version is at least 2.0, you should use: -\verbatim -#include -#if HWLOC_API_VERSION >= 0x00020000 -... -#endif -\endverbatim - -To check for the API of release X.Y.Z at build time, -you may compare ::HWLOC_API_VERSION with (X<<16)+(Y<<8)+Z. - -For supporting older releases that do not have HWLOC_OBJ_NUMANODE -and HWLOC_OBJ_PACKAGE yet, you may use: - -\verbatim -#include -#if HWLOC_API_VERSION < 0x00010b00 -#define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE -#define HWLOC_OBJ_PACKAGE HWLOC_OBJ_SOCKET -#endif -\endverbatim - -The hwloc interface was deeply modified in release 2.0 -to fix several issues of the 1.x interface -(see \ref upgrade_to_api_2x and the NEWS file in the source directory for details). -The ABI was broken, which means -applications must be recompiled against the new 2.0 interface. - -To check that you are not mixing old/recent headers with a recent/old runtime library, -check the major revision number in the API version: -\verbatim -#include - unsigned version = hwloc_get_api_version(); - if ((version >> 16) != (HWLOC_API_VERSION >> 16)) { - fprintf(stderr, - "%s compiled for hwloc API 0x%x but running on library API 0x%x.\n" - "You may need to point LD_LIBRARY_PATH to the right hwloc library.\n" - "Aborting since the new ABI is not backward compatible.\n", - callname, HWLOC_API_VERSION, version); - exit(EXIT_FAILURE); - } -\endverbatim -To specifically detect v2.0 issues: -\verbatim -#include -#if HWLOC_API_VERSION >= 0x00020000 - /* headers are recent */ - if (hwloc_get_api_version() < 0x20000) - ... error out, the hwloc runtime library is older than 2.0 ... -#else - /* headers are pre-2.0 */ - if (hwloc_get_api_version() >= 0x20000) - ... error out, the hwloc runtime library is more recent than 2.0 ... -#endif -\endverbatim - -You should not try to remain compatible with very old releases such as -1.1.x or earlier because ::HWLOC_API_VERSION was added in 1.0.0 -and hwloc_get_api_version() came only in 1.1.1. -Also do not use the old cpuset API since it was deprecated and superseded -by the bitmap API in 1.1, and later removed in 1.5. - -If you ever need to look at the library version instead of the API version, -you may want to use HWLOC_VERSION instead. -Two stable releases of the same series usually have the same ::HWLOC_API_VERSION -even if their HWLOC_VERSION are different. - \htmlonly @@ -3661,27 +3536,6 @@ See HWLOC_DUMPED_HWDATA_DIR in \ref envvar for details about the location of dumped files. -\subsection faq_phi How do I build for Intel Xeon Phi coprocessor? - -\note This section does not apply to standalone Intel Xeon Phi processors (Knights Landing and Knights Mill). - -Intel Xeon Phi coprocessors (Knights Corner) usually runs a Linux environment -but cross-compiling from the host is required. -hwloc uses standard autotools options for cross-compiling. - -If building with icc: -\verbatim -./configure CC="icc -mmic" --host=x86_64-k1om-linux --build=x86_64-unknown-linux-gnu -\endverbatim - -If building with the Xeon Phi-specific GCC that comes with the MPSS environment, -for instance /usr/linux-k1om-4.7/bin/x86_64-k1om-linux-gcc: -\verbatim -export PATH=$PATH:/usr/linux-k1om-4.7/bin/ -./configure --host=x86_64-k1om-linux --build=x86_64-unknown-linux-gnu -\endverbatim - - \subsection faq_bgq How do I build hwloc for BlueGene/Q? IBM BlueGene/Q machines run a standard Linux on the login/frontend nodes @@ -3745,13 +3599,160 @@ chuser "capabilities=CAP_PROPAGATE,CAP_NUMA_ATTACH" \endverbatim +\htmlonly +
    +\endhtmlonly +\section faq5 Compatibility between hwloc versions + +\subsection faq_version_api How do I handle API changes? + +The hwloc interface is extended with every new major release. +Any application using the hwloc API should be prepared to check at +compile-time whether some features are available in the currently +installed hwloc distribution. + +For instance, to check whether the hwloc version is at least 2.0, you should use: +\verbatim +#include +#if HWLOC_API_VERSION >= 0x00020000 +... +#endif +\endverbatim + +To check for the API of release X.Y.Z at build time, +you may compare ::HWLOC_API_VERSION with (X<<16)+(Y<<8)+Z. + +For supporting older releases that do not have HWLOC_OBJ_NUMANODE +and HWLOC_OBJ_PACKAGE yet, you may use: + +\verbatim +#include +#if HWLOC_API_VERSION < 0x00010b00 +#define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE +#define HWLOC_OBJ_PACKAGE HWLOC_OBJ_SOCKET +#endif +\endverbatim + +Once a program is built against a hwloc library, it may also dynamically +link with compatible libraries from other hwloc releases. +The version of that runtime library may be queried with hwloc_get_api_version(). +See \ref faq_version_abi for using this function for testing ABI compatibility. + + + +\subsection faq_version What is the difference between API and library version numbers? + +::HWLOC_API_VERSION is the version of the API. +It changes when functions are added, modified, etc. +However it does not necessarily change from one release to another. +For instance, two releases of the same series (e.g. 2.0.3 and 2.0.4) +usually have the same ::HWLOC_API_VERSION (0x00020000). +However their HWLOC_VERSION strings are different +(\"2.0.3\" and \"2.0.4\" respectively). + + + +\subsection faq_version_abi How do I handle ABI breaks? + +The hwloc interface was deeply modified in release 2.0 +to fix several issues of the 1.x interface +(see \ref upgrade_to_api_2x and the NEWS file in the source directory for details). +The ABI was broken, which means +applications must be recompiled against the new 2.0 interface. + +To check that you are not mixing old/recent headers with a recent/old runtime library, +check the major revision number in the API version: +\verbatim +#include + unsigned version = hwloc_get_api_version(); + if ((version >> 16) != (HWLOC_API_VERSION >> 16)) { + fprintf(stderr, + "%s compiled for hwloc API 0x%x but running on library API 0x%x.\n" + "You may need to point LD_LIBRARY_PATH to the right hwloc library.\n" + "Aborting since the new ABI is not backward compatible.\n", + callname, HWLOC_API_VERSION, version); + exit(EXIT_FAILURE); + } +\endverbatim +To specifically detect v2.0 issues: +\verbatim +#include +#if HWLOC_API_VERSION >= 0x00020000 + /* headers are recent */ + if (hwloc_get_api_version() < 0x20000) + ... error out, the hwloc runtime library is older than 2.0 ... +#else + /* headers are pre-2.0 */ + if (hwloc_get_api_version() >= 0x20000) + ... error out, the hwloc runtime library is more recent than 2.0 ... +#endif +\endverbatim + +In theory, library sonames prevent linking with incompatible libraries. +However custom hwloc installations or improperly configured build environments +may still lead to such issues. +Hence running one of the above (cheap) checks before initializing hwloc topology +may be useful. + + + +\subsection faq_version_xml Are XML topology files compatible between hwloc releases? + +XML topology files are forward-compatible: +a XML file may be loaded by a hwloc library that is more recent +than the hwloc release that exported that file. + +However, hwloc XMLs are not always backward-compatible: +Topologies exported by hwloc 2.x cannot be imported by 1.x by default +(see \ref upgrade_to_api_2x_xml for working around such issues). +There are also some corner cases where backward compatibility +is not guaranteed because of changes between major releases +(for instance 1.11 XMLs could not be imported in 1.10). + +XMLs are exchanged at runtime between some components of the HPC software stack +(for instance the resource managers and MPI processes). +Building all these components on the same (cluster-wide) +hwloc installation is a good way to avoid such incompatibilities. + + + +\subsection faq_version_synthetic Are synthetic strings compatible between hwloc releases? + +Synthetic strings (see \ref synthetic) are forward-compatible: +a synthetic string generated by a release may be imported by future hwloc libraries. + +However they are often not backward-compatible because new details may have been +added to synthetic descriptions in recent releases. +Some flags may be given to hwloc_topology_export_synthetic() to avoid such details +and stay backward compatible. + + + +\subsection faq_version_shmem Is it possible to share a shared-memory topology between different hwloc releases? + +Shared-memory topologies (see \ref hwlocality_shmem) have strong +requirements on compatibility between hwloc libraries. +Adopting a shared-memory topology fails +if it was exported by a non-compatible hwloc release. +Releases with same major revision are usually compatible +(e.g. hwloc 2.0.4 may adopt a topology exported by 2.0.3) +but different major revisions may be incompatible +(e.g. hwloc 2.1.0 cannot adopt from 2.0.x). + +Topologies are shared at runtime between some components of the HPC software stack +(for instance the resource managers and MPI processes). +Building all these components on the same (system-wide) hwloc installation +is a good way to avoid such incompatibilities. + + + \page upgrade_to_api_2x Upgrading to the hwloc 2.0 API \htmlonly
    \endhtmlonly -See \ref faq_upgrade for detecting the hwloc version that you are compiling +See \ref faq5 for detecting the hwloc version that you are compiling and/or running against. diff --git a/hwloc/Makefile.am b/hwloc/Makefile.am index bd899a8a41..ddb0ded931 100644 --- a/hwloc/Makefile.am +++ b/hwloc/Makefile.am @@ -33,6 +33,7 @@ sources = \ distances.c \ components.c \ bind.c \ + distrib.c \ bitmap.c \ pci-common.c \ diff.c \ diff --git a/hwloc/components.c b/hwloc/components.c index 7c3a25eaa3..5c2879b64f 100644 --- a/hwloc/components.c +++ b/hwloc/components.c @@ -13,6 +13,7 @@ #define HWLOC_COMPONENT_STOP_NAME "stop" #define HWLOC_COMPONENT_EXCLUDE_CHAR '-' #define HWLOC_COMPONENT_SEPS "," +#define HWLOC_COMPONENT_PHASESEP_CHAR ':' /* list of all registered discovery components, sorted by priority, higher priority first. * noos is last because its priority is 0. @@ -232,17 +233,6 @@ hwloc_plugins_init(void) #endif /* HWLOC_HAVE_PLUGINS */ -static const char * -hwloc_disc_component_type_string(hwloc_disc_component_type_t type) -{ - switch (type) { - case HWLOC_DISC_COMPONENT_TYPE_CPU: return "cpu"; - case HWLOC_DISC_COMPONENT_TYPE_GLOBAL: return "global"; - case HWLOC_DISC_COMPONENT_TYPE_MISC: return "misc"; - default: return "**unknown**"; - } -} - static int hwloc_disc_component_register(struct hwloc_disc_component *component, const char *filename) @@ -256,21 +246,26 @@ hwloc_disc_component_register(struct hwloc_disc_component *component, return -1; } if (strchr(component->name, HWLOC_COMPONENT_EXCLUDE_CHAR) + || strchr(component->name, HWLOC_COMPONENT_PHASESEP_CHAR) || strcspn(component->name, HWLOC_COMPONENT_SEPS) != strlen(component->name)) { if (hwloc_components_verbose) fprintf(stderr, "Cannot register discovery component with name `%s' containing reserved characters `%c" HWLOC_COMPONENT_SEPS "'\n", component->name, HWLOC_COMPONENT_EXCLUDE_CHAR); return -1; } - /* check that the component type is valid */ - switch ((unsigned) component->type) { - case HWLOC_DISC_COMPONENT_TYPE_CPU: - case HWLOC_DISC_COMPONENT_TYPE_GLOBAL: - case HWLOC_DISC_COMPONENT_TYPE_MISC: - break; - default: - fprintf(stderr, "Cannot register discovery component `%s' with unknown type %u\n", - component->name, (unsigned) component->type); + + /* check that the component phases are valid */ + if (!component->phases + || (component->phases != HWLOC_DISC_PHASE_GLOBAL + && component->phases & ~(HWLOC_DISC_PHASE_CPU + |HWLOC_DISC_PHASE_MEMORY + |HWLOC_DISC_PHASE_PCI + |HWLOC_DISC_PHASE_IO + |HWLOC_DISC_PHASE_MISC + |HWLOC_DISC_PHASE_ANNOTATE + |HWLOC_DISC_PHASE_TWEAK))) { + fprintf(stderr, "Cannot register discovery component `%s' with invalid phases 0x%x\n", + component->name, component->phases); return -1; } @@ -295,8 +290,8 @@ hwloc_disc_component_register(struct hwloc_disc_component *component, prev = &((*prev)->next); } if (hwloc_components_verbose) - fprintf(stderr, "Registered %s discovery component `%s' with priority %u (%s%s)\n", - hwloc_disc_component_type_string(component->type), component->name, component->priority, + fprintf(stderr, "Registered discovery component `%s' phases 0x%x with priority %u (%s%s)\n", + component->name, component->phases, component->priority, filename ? "from plugin " : "statically build", filename ? filename : ""); prev = &hwloc_disc_components; @@ -421,29 +416,117 @@ hwloc_topology_components_init(struct hwloc_topology *topology) topology->blacklisted_components = NULL; topology->backends = NULL; - topology->backend_excludes = 0; + topology->backend_phases = 0; + topology->backend_excluded_phases = 0; } +/* look for name among components, ignoring things after `:' */ static struct hwloc_disc_component * -hwloc_disc_component_find(const char *name /* name of NULL if any */) +hwloc_disc_component_find(const char *name, const char **endp) { - struct hwloc_disc_component *comp = hwloc_disc_components; + struct hwloc_disc_component *comp; + size_t length; + const char *end = strchr(name, HWLOC_COMPONENT_PHASESEP_CHAR); + if (end) { + length = end-name; + if (endp) + *endp = end+1; + } else { + length = strlen(name); + if (endp) + *endp = NULL; + } + + comp = hwloc_disc_components; while (NULL != comp) { - if (NULL == name || !strcmp(name, comp->name)) + if (!strncmp(name, comp->name, length)) return comp; comp = comp->next; } return NULL; } +static unsigned +hwloc_phases_from_string(const char *s) +{ + if (!s) + return ~0U; + if (s[0]<'0' || s[0]>'9') { + if (!strcasecmp(s, "global")) + return HWLOC_DISC_PHASE_GLOBAL; + else if (!strcasecmp(s, "cpu")) + return HWLOC_DISC_PHASE_CPU; + if (!strcasecmp(s, "memory")) + return HWLOC_DISC_PHASE_MEMORY; + if (!strcasecmp(s, "pci")) + return HWLOC_DISC_PHASE_PCI; + if (!strcasecmp(s, "io")) + return HWLOC_DISC_PHASE_IO; + if (!strcasecmp(s, "misc")) + return HWLOC_DISC_PHASE_MISC; + if (!strcasecmp(s, "annotate")) + return HWLOC_DISC_PHASE_ANNOTATE; + if (!strcasecmp(s, "tweak")) + return HWLOC_DISC_PHASE_TWEAK; + return 0; + } + return (unsigned) strtoul(s, NULL, 0); +} + +static int +hwloc_disc_component_blacklist_one(struct hwloc_topology *topology, + const char *name) +{ + struct hwloc_topology_forced_component_s *blacklisted; + struct hwloc_disc_component *comp; + unsigned phases; + unsigned i; + + if (!strcmp(name, "linuxpci") || !strcmp(name, "linuxio")) { + /* replace linuxpci and linuxio with linux (with IO phases) + * for backward compatibility with pre-v2.0 and v2.0 respectively */ + if (hwloc_components_verbose) + fprintf(stderr, "Replacing deprecated component `%s' with `linux' IO phases in blacklisting\n", name); + comp = hwloc_disc_component_find("linux", NULL); + phases = HWLOC_DISC_PHASE_PCI | HWLOC_DISC_PHASE_IO | HWLOC_DISC_PHASE_MISC | HWLOC_DISC_PHASE_ANNOTATE; + + } else { + /* normal lookup */ + const char *end; + comp = hwloc_disc_component_find(name, &end); + phases = hwloc_phases_from_string(end); + } + if (!comp) { + errno = EINVAL; + return -1; + } + + if (hwloc_components_verbose) + fprintf(stderr, "Blacklisting component `%s` phases 0x%x\n", comp->name, phases); + + for(i=0; inr_blacklisted_components; i++) { + if (topology->blacklisted_components[i].component == comp) { + topology->blacklisted_components[i].phases |= phases; + return 0; + } + } + + blacklisted = realloc(topology->blacklisted_components, (topology->nr_blacklisted_components+1)*sizeof(*blacklisted)); + if (!blacklisted) + return -1; + + blacklisted[topology->nr_blacklisted_components].component = comp; + blacklisted[topology->nr_blacklisted_components].phases = phases; + topology->blacklisted_components = blacklisted; + topology->nr_blacklisted_components++; + return 0; +} + int hwloc_topology_set_components(struct hwloc_topology *topology, unsigned long flags, const char *name) { - struct hwloc_disc_component *comp; - struct hwloc_topology_forced_component_s *blacklisted; - if (topology->is_loaded) { errno = EBUSY; return -1; @@ -460,19 +543,12 @@ hwloc_topology_set_components(struct hwloc_topology *topology, return -1; } - comp = hwloc_disc_component_find(name); - if (!comp) { - errno = EINVAL; - return -1; + if (!strncmp(name, "all", 3) && name[3] == HWLOC_COMPONENT_PHASESEP_CHAR) { + topology->backend_excluded_phases = hwloc_phases_from_string(name+4); + return 0; } - blacklisted = realloc(topology->blacklisted_components, (topology->nr_blacklisted_components+1)*sizeof(*blacklisted)); - if (!blacklisted) - return -1; - - blacklisted[topology->nr_blacklisted_components++].component = comp; - topology->blacklisted_components = blacklisted; - return 0; + return hwloc_disc_component_blacklist_one(topology, name); } /* used by set_xml(), set_synthetic(), ... environment variables, ... to force the first backend */ @@ -490,18 +566,28 @@ hwloc_disc_component_force_enable(struct hwloc_topology *topology, return -1; } - comp = hwloc_disc_component_find(name); + comp = hwloc_disc_component_find(name, NULL); if (!comp) { errno = ENOSYS; return -1; } - backend = comp->instantiate(topology, comp, data1, data2, data3); + backend = comp->instantiate(topology, comp, 0U /* force-enabled don't get any phase blacklisting */, + data1, data2, data3); if (backend) { + int err; backend->envvar_forced = envvar_forced; if (topology->backends) hwloc_backends_disable_all(topology); - return hwloc_backend_enable(backend); + err = hwloc_backend_enable(backend); + + if (comp->phases == HWLOC_DISC_PHASE_GLOBAL) { + char *env = getenv("HWLOC_ANNOTATE_GLOBAL_COMPONENTS"); + if (env && atoi(env)) + topology->backend_excluded_phases &= ~HWLOC_DISC_PHASE_ANNOTATE; + } + + return err; } else return -1; } @@ -509,26 +595,30 @@ hwloc_disc_component_force_enable(struct hwloc_topology *topology, static int hwloc_disc_component_try_enable(struct hwloc_topology *topology, struct hwloc_disc_component *comp, - int envvar_forced) + int envvar_forced, + unsigned blacklisted_phases) { struct hwloc_backend *backend; - if (topology->backend_excludes & comp->type) { + if (!(comp->phases & ~(topology->backend_excluded_phases | blacklisted_phases))) { + /* all this backend phases are already excluded, exclude the backend entirely */ if (hwloc_components_verbose) /* do not warn if envvar_forced since system-wide HWLOC_COMPONENTS must be silently ignored after set_xml() etc. */ - fprintf(stderr, "Excluding %s discovery component `%s', conflicts with excludes 0x%x\n", - hwloc_disc_component_type_string(comp->type), comp->name, topology->backend_excludes); + fprintf(stderr, "Excluding discovery component `%s' phases 0x%x, conflicts with excludes 0x%x\n", + comp->name, comp->phases, topology->backend_excluded_phases); return -1; } - backend = comp->instantiate(topology, comp, NULL, NULL, NULL); + backend = comp->instantiate(topology, comp, topology->backend_excluded_phases | blacklisted_phases, + NULL, NULL, NULL); if (!backend) { if (hwloc_components_verbose || envvar_forced) fprintf(stderr, "Failed to instantiate discovery component `%s'\n", comp->name); return -1; } + backend->phases &= ~blacklisted_phases; backend->envvar_forced = envvar_forced; return hwloc_backend_enable(backend); } @@ -546,7 +636,7 @@ hwloc_disc_components_enable_others(struct hwloc_topology *topology) _env = getenv("HWLOC_COMPONENTS"); env = _env ? strdup(_env) : NULL; - /* enable explicitly listed components */ + /* blacklist disabled components */ if (env) { char *curenv = env; size_t s; @@ -556,21 +646,41 @@ hwloc_disc_components_enable_others(struct hwloc_topology *topology) if (s) { char c; - /* replace linuxpci with linuxio for backward compatibility with pre-v2.0 */ - if (!strncmp(curenv, "linuxpci", 8) && s == 8) { - curenv[5] = 'i'; - curenv[6] = 'o'; - curenv[7] = *HWLOC_COMPONENT_SEPS; - } else if (curenv[0] == HWLOC_COMPONENT_EXCLUDE_CHAR && !strncmp(curenv+1, "linuxpci", 8) && s == 9) { - curenv[6] = 'i'; - curenv[7] = 'o'; - curenv[8] = *HWLOC_COMPONENT_SEPS; - /* skip this name, it's a negated one */ + if (curenv[0] != HWLOC_COMPONENT_EXCLUDE_CHAR) goto nextname; - } - if (curenv[0] == HWLOC_COMPONENT_EXCLUDE_CHAR) - goto nextname; + /* save the last char and replace with \0 */ + c = curenv[s]; + curenv[s] = '\0'; + + /* blacklist it, and just ignore failures to allocate */ + hwloc_disc_component_blacklist_one(topology, curenv+1); + + /* remove that blacklisted name from the string */ + for(i=0; inr_blacklisted_components; i++) + if (comp == topology->blacklisted_components[i].component) { + blacklisted_phases = topology->blacklisted_components[i].phases; + break; + } + if (comp->phases & ~blacklisted_phases) + hwloc_disc_component_try_enable(topology, comp, 1 /* envvar forced */, blacklisted_phases); } else { - fprintf(stderr, "Cannot find discovery component `%s'\n", curenv); + fprintf(stderr, "Cannot find discovery component `%s'\n", name); } /* restore chars (the second loop below needs env to be unmodified) */ curenv[s] = c; } -nextname: curenv += s; if (*curenv) /* Skip comma */ @@ -606,34 +729,24 @@ hwloc_disc_components_enable_others(struct hwloc_topology *topology) if (tryall) { comp = hwloc_disc_components; while (NULL != comp) { + unsigned blacklisted_phases = 0U; if (!comp->enabled_by_default) goto nextcomp; /* check if this component was blacklisted by the application */ for(i=0; inr_blacklisted_components; i++) if (comp == topology->blacklisted_components[i].component) { - if (hwloc_components_verbose) - fprintf(stderr, "Excluding %s discovery component `%s' on application request\n", - hwloc_disc_component_type_string(comp->type), comp->name); - goto nextcomp; - } - /* check if this component was explicitly excluded in env */ - if (env) { - char *curenv = env; - while (*curenv) { - size_t s = strcspn(curenv, HWLOC_COMPONENT_SEPS); - if (curenv[0] == HWLOC_COMPONENT_EXCLUDE_CHAR && !strncmp(curenv+1, comp->name, s-1) && strlen(comp->name) == s-1) { - if (hwloc_components_verbose) - fprintf(stderr, "Excluding %s discovery component `%s' because of HWLOC_COMPONENTS environment variable\n", - hwloc_disc_component_type_string(comp->type), comp->name); - goto nextcomp; - } - curenv += s; - if (*curenv) - /* Skip comma */ - curenv++; + blacklisted_phases = topology->blacklisted_components[i].phases; + break; } + + if (!(comp->phases & ~blacklisted_phases)) { + if (hwloc_components_verbose) + fprintf(stderr, "Excluding blacklisted discovery component `%s' phases 0x%x\n", + comp->name, comp->phases); + goto nextcomp; } - hwloc_disc_component_try_enable(topology, comp, 0 /* defaults, not envvar forced */); + + hwloc_disc_component_try_enable(topology, comp, 0 /* defaults, not envvar forced */, blacklisted_phases); nextcomp: comp = comp->next; } @@ -645,7 +758,7 @@ hwloc_disc_components_enable_others(struct hwloc_topology *topology) backend = topology->backends; fprintf(stderr, "Final list of enabled discovery components: "); while (backend != NULL) { - fprintf(stderr, "%s%s", first ? "" : ",", backend->component->name); + fprintf(stderr, "%s%s(0x%x)", first ? "" : ",", backend->component->name, backend->phases); backend = backend->next; first = 0; } @@ -696,6 +809,11 @@ hwloc_backend_alloc(struct hwloc_topology *topology, } backend->component = component; backend->topology = topology; + /* filter-out component phases that are excluded */ + backend->phases = component->phases & ~topology->backend_excluded_phases; + if (backend->phases != component->phases && hwloc_components_verbose) + fprintf(stderr, "Trying discovery component `%s' with phases 0x%x instead of 0x%x\n", + component->name, backend->phases, component->phases); backend->flags = 0; backend->discover = NULL; backend->get_pci_busid_cpuset = NULL; @@ -722,8 +840,8 @@ hwloc_backend_enable(struct hwloc_backend *backend) /* check backend flags */ if (backend->flags) { - fprintf(stderr, "Cannot enable %s discovery component `%s' with unknown flags %lx\n", - hwloc_disc_component_type_string(backend->component->type), backend->component->name, backend->flags); + fprintf(stderr, "Cannot enable discovery component `%s' phases 0x%x with unknown flags %lx\n", + backend->component->name, backend->component->phases, backend->flags); return -1; } @@ -732,8 +850,8 @@ hwloc_backend_enable(struct hwloc_backend *backend) while (NULL != *pprev) { if ((*pprev)->component == backend->component) { if (hwloc_components_verbose) - fprintf(stderr, "Cannot enable %s discovery component `%s' twice\n", - hwloc_disc_component_type_string(backend->component->type), backend->component->name); + fprintf(stderr, "Cannot enable discovery component `%s' phases 0x%x twice\n", + backend->component->name, backend->component->phases); hwloc_backend_disable(backend); errno = EBUSY; return -1; @@ -742,8 +860,8 @@ hwloc_backend_enable(struct hwloc_backend *backend) } if (hwloc_components_verbose) - fprintf(stderr, "Enabling %s discovery component `%s'\n", - hwloc_disc_component_type_string(backend->component->type), backend->component->name); + fprintf(stderr, "Enabling discovery component `%s' with phases 0x%x (among 0x%x)\n", + backend->component->name, backend->phases, backend->component->phases); /* enqueue at the end */ pprev = &topology->backends; @@ -752,7 +870,8 @@ hwloc_backend_enable(struct hwloc_backend *backend) backend->next = *pprev; *pprev = backend; - topology->backend_excludes |= backend->component->excludes; + topology->backend_phases |= backend->component->phases; + topology->backend_excluded_phases |= backend->component->excluded_phases; return 0; } @@ -825,13 +944,13 @@ hwloc_backends_disable_all(struct hwloc_topology *topology) while (NULL != (backend = topology->backends)) { struct hwloc_backend *next = backend->next; if (hwloc_components_verbose) - fprintf(stderr, "Disabling %s discovery component `%s'\n", - hwloc_disc_component_type_string(backend->component->type), backend->component->name); + fprintf(stderr, "Disabling discovery component `%s'\n", + backend->component->name); hwloc_backend_disable(backend); topology->backends = next; } topology->backends = NULL; - topology->backend_excludes = 0; + topology->backend_excluded_phases = 0; } void diff --git a/hwloc/diff.c b/hwloc/diff.c index 1450242b24..7794358bb9 100644 --- a/hwloc/diff.c +++ b/hwloc/diff.c @@ -1,5 +1,5 @@ /* - * Copyright © 2013-2018 Inria. All rights reserved. + * Copyright © 2013-2019 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -351,7 +351,8 @@ int hwloc_topology_diff_build(hwloc_topology_t topo1, err = 1; break; } - if (dist1->type != dist2->type + if (dist1->unique_type != dist2->unique_type + || dist1->different_types || dist2->different_types /* too lazy to support this case */ || dist1->nbobjs != dist2->nbobjs || dist1->kind != dist2->kind || memcmp(dist1->values, dist2->values, dist1->nbobjs * dist1->nbobjs * sizeof(*dist1->values))) { diff --git a/hwloc/distances.c b/hwloc/distances.c index c5c5bdf6b4..9e56a96962 100644 --- a/hwloc/distances.c +++ b/hwloc/distances.c @@ -14,6 +14,9 @@ #include #include +static struct hwloc_internal_distances_s * +hwloc__internal_distances_from_public(hwloc_topology_t topology, struct hwloc_distances_s *distances); + /****************************************************** * Global init, prepare, destroy, dup */ @@ -70,6 +73,8 @@ void hwloc_internal_distances_prepare(struct hwloc_topology *topology) static void hwloc_internal_distances_free(struct hwloc_internal_distances_s *dist) { + free(dist->name); + free(dist->different_types); free(dist->indexes); free(dist->objs); free(dist->values); @@ -96,15 +101,35 @@ static int hwloc_internal_distances_dup_one(struct hwloc_topology *new, struct h newdist = hwloc_tma_malloc(tma, sizeof(*newdist)); if (!newdist) return -1; + if (olddist->name) { + newdist->name = hwloc_tma_strdup(tma, olddist->name); + if (!newdist->name) { + assert(!tma || !tma->dontfree); /* this tma cannot fail to allocate */ + hwloc_internal_distances_free(newdist); + return -1; + } + } else { + newdist->name = NULL; + } - newdist->type = olddist->type; + if (olddist->different_types) { + newdist->different_types = hwloc_tma_malloc(tma, nbobjs * sizeof(*newdist->different_types)); + if (!newdist->different_types) { + assert(!tma || !tma->dontfree); /* this tma cannot fail to allocate */ + hwloc_internal_distances_free(newdist); + return -1; + } + memcpy(newdist->different_types, olddist->different_types, nbobjs * sizeof(*newdist->different_types)); + } else + newdist->different_types = NULL; + newdist->unique_type = olddist->unique_type; newdist->nbobjs = nbobjs; newdist->kind = olddist->kind; newdist->id = olddist->id; newdist->indexes = hwloc_tma_malloc(tma, nbobjs * sizeof(*newdist->indexes)); newdist->objs = hwloc_tma_calloc(tma, nbobjs * sizeof(*newdist->objs)); - newdist->objs_are_valid = 0; + newdist->iflags = olddist->iflags & ~HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID; /* must be revalidated after dup() */ newdist->values = hwloc_tma_malloc(tma, nbobjs*nbobjs * sizeof(*newdist->values)); if (!newdist->indexes || !newdist->objs || !newdist->values) { assert(!tma || !tma->dontfree); /* this tma cannot fail to allocate */ @@ -182,7 +207,7 @@ int hwloc_distances_remove_by_depth(hwloc_topology_t topology, int depth) next = topology->first_dist; while ((dist = next) != NULL) { next = dist->next; - if (dist->type == type) { + if (dist->unique_type == type) { if (next) next->prev = dist->prev; else @@ -198,6 +223,27 @@ int hwloc_distances_remove_by_depth(hwloc_topology_t topology, int depth) return 0; } +int hwloc_distances_release_remove(hwloc_topology_t topology, + struct hwloc_distances_s *distances) +{ + struct hwloc_internal_distances_s *dist = hwloc__internal_distances_from_public(topology, distances); + if (!dist) { + errno = EINVAL; + return -1; + } + if (dist->prev) + dist->prev->next = dist->next; + else + topology->first_dist = dist->next; + if (dist->next) + dist->next->prev = dist->prev; + else + topology->last_dist = dist->prev; + hwloc_internal_distances_free(dist); + hwloc_distances_release(topology, distances); + return 0; +} + /****************************************************** * Add distances to the topology */ @@ -209,17 +255,34 @@ hwloc__groups_by_distances(struct hwloc_topology *topology, unsigned nbobjs, str * the caller gives us the distances and objs pointers, we'll free them later. */ static int -hwloc_internal_distances__add(hwloc_topology_t topology, - hwloc_obj_type_t type, unsigned nbobjs, hwloc_obj_t *objs, uint64_t *indexes, uint64_t *values, - unsigned long kind) +hwloc_internal_distances__add(hwloc_topology_t topology, const char *name, + hwloc_obj_type_t unique_type, hwloc_obj_type_t *different_types, + unsigned nbobjs, hwloc_obj_t *objs, uint64_t *indexes, uint64_t *values, + unsigned long kind, unsigned iflags) { - struct hwloc_internal_distances_s *dist = calloc(1, sizeof(*dist)); + struct hwloc_internal_distances_s *dist; + + if (different_types) { + kind |= HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES; /* the user isn't forced to give it */ + } else if (kind & HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES) { + errno = EINVAL; + goto err; + } + + dist = calloc(1, sizeof(*dist)); if (!dist) goto err; - dist->type = type; + if (name) + dist->name = strdup(name); /* ignore failure */ + + dist->unique_type = unique_type; + dist->different_types = different_types; dist->nbobjs = nbobjs; dist->kind = kind; + dist->iflags = iflags; + + assert(!!(iflags & HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID) == !!objs); if (!objs) { assert(indexes); @@ -228,18 +291,16 @@ hwloc_internal_distances__add(hwloc_topology_t topology, dist->objs = calloc(nbobjs, sizeof(hwloc_obj_t)); if (!dist->objs) goto err_with_dist; - dist->objs_are_valid = 0; } else { unsigned i; assert(!indexes); /* we only have objs, generate the indexes arrays so that we can refresh objs later */ dist->objs = objs; - dist->objs_are_valid = 1; dist->indexes = malloc(nbobjs * sizeof(*dist->indexes)); if (!dist->indexes) goto err_with_dist; - if (HWLOC_DIST_TYPE_USE_OS_INDEX(dist->type)) { + if (HWLOC_DIST_TYPE_USE_OS_INDEX(dist->unique_type)) { for(i=0; iindexes[i] = objs[i]->os_index; } else { @@ -264,16 +325,19 @@ hwloc_internal_distances__add(hwloc_topology_t topology, err_with_dist: free(dist); err: + free(different_types); free(objs); free(indexes); free(values); return -1; } -int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, - hwloc_obj_type_t type, unsigned nbobjs, uint64_t *indexes, uint64_t *values, +int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, const char *name, + hwloc_obj_type_t unique_type, hwloc_obj_type_t *different_types, unsigned nbobjs, uint64_t *indexes, uint64_t *values, unsigned long kind, unsigned long flags) { + unsigned iflags = 0; /* objs not valid */ + if (nbobjs < 2) { errno = EINVAL; goto err; @@ -287,11 +351,12 @@ int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, goto err; } - return hwloc_internal_distances__add(topology, type, nbobjs, NULL, indexes, values, kind); + return hwloc_internal_distances__add(topology, name, unique_type, different_types, nbobjs, NULL, indexes, values, kind, iflags); err: free(indexes); free(values); + free(different_types); return -1; } @@ -301,11 +366,13 @@ hwloc_internal_distances_restrict(hwloc_obj_t *objs, uint64_t *values, unsigned nbobjs, unsigned disappeared); -int hwloc_internal_distances_add(hwloc_topology_t topology, +int hwloc_internal_distances_add(hwloc_topology_t topology, const char *name, unsigned nbobjs, hwloc_obj_t *objs, uint64_t *values, unsigned long kind, unsigned long flags) { + hwloc_obj_type_t unique_type, *different_types; unsigned i, disappeared = 0; + unsigned iflags = HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID; if (nbobjs < 2) { errno = EINVAL; @@ -329,7 +396,26 @@ int hwloc_internal_distances_add(hwloc_topology_t topology, nbobjs -= disappeared; } - if (topology->grouping && (flags & HWLOC_DISTANCES_ADD_FLAG_GROUP)) { + unique_type = objs[0]->type; + for(i=1; itype != unique_type) { + unique_type = HWLOC_OBJ_TYPE_NONE; + break; + } + if (unique_type == HWLOC_OBJ_TYPE_NONE) { + /* heterogeneous types */ + different_types = malloc(nbobjs * sizeof(*different_types)); + if (!different_types) + goto err; + for(i=0; itype; + + } else { + /* homogeneous types */ + different_types = NULL; + } + + if (topology->grouping && (flags & HWLOC_DISTANCES_ADD_FLAG_GROUP) && !different_types) { float full_accuracy = 0.f; float *accuracies; unsigned nbaccuracies; @@ -344,7 +430,7 @@ int hwloc_internal_distances_add(hwloc_topology_t topology, if (topology->grouping_verbose) { unsigned j; - int gp = !HWLOC_DIST_TYPE_USE_OS_INDEX(objs[0]->type); + int gp = !HWLOC_DIST_TYPE_USE_OS_INDEX(unique_type); fprintf(stderr, "Trying to group objects using distance matrix:\n"); fprintf(stderr, "%s", gp ? "gp_index" : "os_index"); for(j=0; jtype, nbobjs, objs, NULL, values, kind); + return hwloc_internal_distances__add(topology, name, unique_type, different_types, nbobjs, objs, NULL, values, kind, iflags); err: free(objs); @@ -381,7 +467,6 @@ int hwloc_distances_add(hwloc_topology_t topology, unsigned nbobjs, hwloc_obj_t *objs, hwloc_uint64_t *values, unsigned long kind, unsigned long flags) { - hwloc_obj_type_t type; unsigned i; uint64_t *_values; hwloc_obj_t *_objs; @@ -405,15 +490,8 @@ int hwloc_distances_add(hwloc_topology_t topology, /* no strict need to check for duplicates, things shouldn't break */ - type = objs[0]->type; - if (type == HWLOC_OBJ_GROUP) { - /* not supported yet, would require we save the subkind together with the type. */ - errno = EINVAL; - return -1; - } - for(i=1; itype != type) { + if (!objs[i]) { errno = EINVAL; return -1; } @@ -426,7 +504,7 @@ int hwloc_distances_add(hwloc_topology_t topology, memcpy(_objs, objs, nbobjs*sizeof(hwloc_obj_t)); memcpy(_values, values, nbobjs*nbobjs*sizeof(*_values)); - err = hwloc_internal_distances_add(topology, nbobjs, _objs, _values, kind, flags); + err = hwloc_internal_distances_add(topology, NULL, nbobjs, _objs, _values, kind, flags); if (err < 0) goto out; /* _objs and _values freed in hwloc_internal_distances_add() */ @@ -446,9 +524,9 @@ int hwloc_distances_add(hwloc_topology_t topology, * Refresh objects in distances */ -static hwloc_obj_t hwloc_find_obj_by_type_and_gp_index(hwloc_topology_t topology, hwloc_obj_type_t type, uint64_t gp_index) +static hwloc_obj_t hwloc_find_obj_by_depth_and_gp_index(hwloc_topology_t topology, unsigned depth, uint64_t gp_index) { - hwloc_obj_t obj = hwloc_get_obj_by_type(topology, type, 0); + hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, 0); while (obj) { if (obj->gp_index == gp_index) return obj; @@ -457,6 +535,25 @@ static hwloc_obj_t hwloc_find_obj_by_type_and_gp_index(hwloc_topology_t topology return NULL; } +static hwloc_obj_t hwloc_find_obj_by_type_and_gp_index(hwloc_topology_t topology, hwloc_obj_type_t type, uint64_t gp_index) +{ + int depth = hwloc_get_type_depth(topology, type); + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) + return NULL; + if (depth == HWLOC_TYPE_DEPTH_MULTIPLE) { + int topodepth = hwloc_topology_get_depth(topology); + for(depth=0; depthtype; + hwloc_obj_type_t unique_type = dist->unique_type; + hwloc_obj_type_t *different_types = dist->different_types; unsigned nbobjs = dist->nbobjs; hwloc_obj_t *objs = dist->objs; uint64_t *indexes = dist->indexes; unsigned disappeared = 0; unsigned i; - if (dist->objs_are_valid) + if (dist->iflags & HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID) return 0; for(i=0; inbobjs -= disappeared; } - dist->objs_are_valid = 1; + dist->iflags |= HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID; return 0; } @@ -562,32 +660,64 @@ hwloc_internal_distances_invalidate_cached_objs(hwloc_topology_t topology) { struct hwloc_internal_distances_s *dist; for(dist = topology->first_dist; dist; dist = dist->next) - dist->objs_are_valid = 0; + dist->iflags &= ~HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID; } /****************************************************** * User API for getting distances */ +/* what we actually allocate for user queries, even if we only + * return the distances part of it. + */ +struct hwloc_distances_container_s { + unsigned id; + struct hwloc_distances_s distances; +}; + +#define HWLOC_DISTANCES_CONTAINER_OFFSET ((char*)&((struct hwloc_distances_container_s*)NULL)->distances - (char*)NULL) +#define HWLOC_DISTANCES_CONTAINER(_d) (struct hwloc_distances_container_s *) ( ((char*)_d) - HWLOC_DISTANCES_CONTAINER_OFFSET ) + +static struct hwloc_internal_distances_s * +hwloc__internal_distances_from_public(hwloc_topology_t topology, struct hwloc_distances_s *distances) +{ + struct hwloc_distances_container_s *cont = HWLOC_DISTANCES_CONTAINER(distances); + struct hwloc_internal_distances_s *dist; + for(dist = topology->first_dist; dist; dist = dist->next) + if (dist->id == cont->id) + return dist; + return NULL; +} + void hwloc_distances_release(hwloc_topology_t topology __hwloc_attribute_unused, struct hwloc_distances_s *distances) { + struct hwloc_distances_container_s *cont = HWLOC_DISTANCES_CONTAINER(distances); free(distances->values); free(distances->objs); - free(distances); + free(cont); +} + +const char * +hwloc_distances_get_name(hwloc_topology_t topology, struct hwloc_distances_s *distances) +{ + struct hwloc_internal_distances_s *dist = hwloc__internal_distances_from_public(topology, distances); + return dist ? dist->name : NULL; } static struct hwloc_distances_s * hwloc_distances_get_one(hwloc_topology_t topology __hwloc_attribute_unused, struct hwloc_internal_distances_s *dist) { + struct hwloc_distances_container_s *cont; struct hwloc_distances_s *distances; unsigned nbobjs; - distances = malloc(sizeof(*distances)); - if (!distances) + cont = malloc(sizeof(*cont)); + if (!cont) return NULL; + distances = &cont->distances; nbobjs = distances->nbobjs = dist->nbobjs; @@ -602,18 +732,20 @@ hwloc_distances_get_one(hwloc_topology_t topology __hwloc_attribute_unused, memcpy(distances->values, dist->values, nbobjs*nbobjs*sizeof(*distances->values)); distances->kind = dist->kind; + + cont->id = dist->id; return distances; out_with_objs: free(distances->objs); out: - free(distances); + free(cont); return NULL; } static int hwloc__distances_get(hwloc_topology_t topology, - hwloc_obj_type_t type, + const char *name, hwloc_obj_type_t type, unsigned *nrp, struct hwloc_distances_s **distancesp, unsigned long kind, unsigned long flags __hwloc_attribute_unused) { @@ -644,7 +776,10 @@ hwloc__distances_get(hwloc_topology_t topology, unsigned long kind_from = kind & HWLOC_DISTANCES_KIND_FROM_ALL; unsigned long kind_means = kind & HWLOC_DISTANCES_KIND_MEANS_ALL; - if (type != HWLOC_OBJ_TYPE_NONE && type != dist->type) + if (name && (!dist->name || strcmp(name, dist->name))) + continue; + + if (type != HWLOC_OBJ_TYPE_NONE && type != dist->unique_type) continue; if (kind_from && !(kind_from & dist->kind)) @@ -682,7 +817,7 @@ hwloc_distances_get(hwloc_topology_t topology, return -1; } - return hwloc__distances_get(topology, HWLOC_OBJ_TYPE_NONE, nrp, distancesp, kind, flags); + return hwloc__distances_get(topology, NULL, HWLOC_OBJ_TYPE_NONE, nrp, distancesp, kind, flags); } int @@ -697,14 +832,40 @@ hwloc_distances_get_by_depth(hwloc_topology_t topology, int depth, return -1; } - /* switch back to types since we don't support groups for now */ + /* FIXME: passing the depth of a group level may return group distances at a different depth */ type = hwloc_get_depth_type(topology, depth); if (type == (hwloc_obj_type_t)-1) { errno = EINVAL; return -1; } - return hwloc__distances_get(topology, type, nrp, distancesp, kind, flags); + return hwloc__distances_get(topology, NULL, type, nrp, distancesp, kind, flags); +} + +int +hwloc_distances_get_by_name(hwloc_topology_t topology, const char *name, + unsigned *nrp, struct hwloc_distances_s **distancesp, + unsigned long flags) +{ + if (flags || !topology->is_loaded) { + errno = EINVAL; + return -1; + } + + return hwloc__distances_get(topology, name, HWLOC_OBJ_TYPE_NONE, nrp, distancesp, HWLOC_DISTANCES_KIND_ALL, flags); +} + +int +hwloc_distances_get_by_type(hwloc_topology_t topology, hwloc_obj_type_t type, + unsigned *nrp, struct hwloc_distances_s **distancesp, + unsigned long kind, unsigned long flags) +{ + if (flags || !topology->is_loaded) { + errno = EINVAL; + return -1; + } + + return hwloc__distances_get(topology, NULL, type, nrp, distancesp, kind, flags); } /****************************************************** diff --git a/hwloc/distrib.c b/hwloc/distrib.c new file mode 100644 index 0000000000..311999bb4b --- /dev/null +++ b/hwloc/distrib.c @@ -0,0 +1,347 @@ +/*************************************************************************** + * Copyright 2019 UChicago Argonne, LLC. + * Author: Nicolas Denoyelle + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING in top-level directory. +****************************************************************************/ + +#include +#include +#include "private/autogen/config.h" +#include "hwloc.h" + +struct hwloc_distrib_level{ + hwloc_obj_type_t type; // level type. + unsigned depth; // level depth. + unsigned user_index; // Index of this level as provided by user order. + unsigned arity; // Number of children of this level below parent. + unsigned coord; // The current level object index [0..arity[. + // Iteration order of this level objects. index[coord] give logical_index below parent. + unsigned *index; +}; + +struct hwloc_distrib_iterator{ + hwloc_obj_t *roots; + unsigned n_roots; + unsigned root_coord; + struct hwloc_distrib_level ** levels; // n_roots * n_levels + unsigned n_levels; +}; + +static unsigned* range(const unsigned n){ + unsigned i,*r = malloc(n*sizeof(*r)); + + if(r==NULL) + return NULL; + for(i=0; i0;i--){ + val = rand()%(i); + ret[i-1] = index[val]; + index[val] = index[i-1]; + } + free(index); + return ret; +} + +static int hwloc_distrib_level_cmp_depth(const void *la, const void* lb){ + const struct hwloc_distrib_level *a = (struct hwloc_distrib_level *)la; + const struct hwloc_distrib_level *b = (struct hwloc_distrib_level *)lb; + if(a->depth > b->depth) + return 1; + if(a->depth < b->depth) + return -1; + return 0; +} + +static int hwloc_distrib_level_cmp_user_index(const void *la, const void* lb){ + const struct hwloc_distrib_level *a = (struct hwloc_distrib_level *)la; + const struct hwloc_distrib_level *b = (struct hwloc_distrib_level *)lb; + if(a->user_index > b->user_index) + return 1; + if(a->user_index < b->user_index) + return -1; + return 0; +} + +static struct hwloc_distrib_level * +hwloc_distrib_root_levels(hwloc_topology_t topology, + const hwloc_obj_t root, + const hwloc_obj_type_t *types, + const unsigned n_types, + const unsigned long flags) +{ + unsigned i; + unsigned arity; + hwloc_obj_t parent; + struct hwloc_distrib_level *levels; + + levels = malloc(n_types * sizeof(*levels)); + if(levels == NULL) + return NULL; + + for (i=0; icpuset, levels[i].depth); + levels[i].arity = arity > levels[i].arity ? arity : levels[i].arity; + parent = hwloc_get_next_obj_inside_cpuset_by_depth(topology, root->cpuset, parent->depth, parent); + } + + if (levels[i].arity == 0) { + fprintf(stderr, "No object of type %s below level %s.\n", hwloc_obj_type_string(levels[i].type), hwloc_obj_type_string(levels[i-1].type)); + goto failure; + } + + parent = hwloc_get_obj_inside_cpuset_by_depth(topology, root->cpuset, levels[i].depth, 0); + } + + // Allocate levels index. + for (i=0; iindex); + free(levels); +} + + +struct hwloc_distrib_iterator * +hwloc_distrib_build_iterator(hwloc_topology_t topology, + hwloc_obj_t *roots, + const unsigned n_roots, + const hwloc_obj_type_t *levels, + const unsigned n_levels, + const unsigned long flags){ + unsigned i; + struct hwloc_distrib_iterator *it = malloc(sizeof(*it) + sizeof(*it->levels) * n_roots); + if(it == NULL) + return NULL; + + it->roots = roots; + it->n_roots = n_roots; + it->root_coord = 0; + it->n_levels = n_levels; + it->levels = (struct hwloc_distrib_level **)((char*)it + sizeof(*it)); + + for(i=0; ilevels[i] = hwloc_distrib_root_levels(topology, roots[i], levels, n_levels, flags); + if(it->levels[i] == NULL){ + while(i--) + hwloc_distrib_destroy_level(it->levels[i]); + goto failure; + } + } + + return it; + + failure: + free(it); + return NULL; +} + +HWLOC_DECLSPEC struct hwloc_distrib_iterator * +hwloc_distrib_iterator_round_robin(hwloc_topology_t topology, + const hwloc_obj_type_t type, + const unsigned long flags){ + hwloc_obj_t root = hwloc_get_obj_by_depth(topology, 0, 0); + struct hwloc_distrib_iterator *it; + + it = malloc(sizeof(*it) + sizeof(hwloc_obj_t) + sizeof(struct hwloc_distrib_level*)); + if(it == NULL) + return NULL; + + it->roots = (hwloc_obj_t*) ((char*)it + sizeof(*it)); + *it->roots = root; + it->n_roots = 1; + it->root_coord = 0; + it->n_levels = 1; + it->levels = (struct hwloc_distrib_level **)((char*)it + sizeof(*it) + sizeof(hwloc_obj_t)); + *it->levels = hwloc_distrib_root_levels(topology, root, &type, 1, flags); + + if (*it->levels == NULL){ free(it); return NULL; } + return it; +} + +HWLOC_DECLSPEC struct hwloc_distrib_iterator * +hwloc_distrib_iterator_scatter(hwloc_topology_t topology, + const hwloc_obj_type_t type, + const unsigned long flags){ + + unsigned i=0, n=0; + hwloc_obj_t obj, root = hwloc_get_obj_by_depth(topology, 0, 0); + hwloc_obj_type_t *levels; + struct hwloc_distrib_iterator *it; + + // Count depths with a non empty cpuset. + obj = root; + while(obj){ + if ((obj->cpuset != NULL && !hwloc_bitmap_iszero(obj->cpuset)) && hwloc_get_type_depth(topology, obj->type) >= 0) + n++; + if (obj->type == type) + break; + obj = obj->first_child; + } + + // fill levels array. + levels = malloc(sizeof(*levels) * n); + obj = root; + while(obj){ + if( obj->cpuset != NULL && !hwloc_bitmap_iszero(obj->cpuset) && hwloc_get_type_depth(topology, obj->type) >= 0){ + levels[n-1-i] = obj->type; + i++; + } + if (obj->type == type) + break; + obj = obj->first_child; + } + + it = malloc(sizeof(*it) + sizeof(hwloc_obj_t) + sizeof(struct hwloc_distrib_level*)); + + if(it == NULL) + goto failure; + + it->roots = (hwloc_obj_t*) ((char*)it + sizeof(*it)); + *it->roots = root; + it->n_roots = 1; + it->root_coord = 0; + it->n_levels = n; + it->levels = (struct hwloc_distrib_level **)((char*)it + sizeof(*it) + sizeof(hwloc_obj_t)); + + *it->levels = hwloc_distrib_root_levels(topology, root, levels, n, flags); + + if (*it->levels == NULL) + goto failure_with_it; + + free(levels); + return it; + + failure_with_it: + free(it); + failure: + free(levels); + return NULL; +} + +void hwloc_distrib_destroy_iterator(struct hwloc_distrib_iterator *it){ + unsigned i; + + for(i=0; in_roots; i++) + hwloc_distrib_destroy_level(it->levels[i]); + free(it); +} + +// Increment coordinates by one. Return 1 if iterator reached end and reset it. +// Else return 0. +static int +hwloc_distrib_iterator_inc(struct hwloc_distrib_iterator *it){ + int i; + struct hwloc_distrib_level *levels; + + do_root: + // Sort by user_index to increment coordinates. + levels = it->levels[it->root_coord]; + qsort(levels, it->n_levels, sizeof(*levels), hwloc_distrib_level_cmp_user_index); + + for (i=it->n_levels-1; i>=0; i--){ + if(++levels[i].coord >= levels[i].arity) + levels[i].coord = 0; + else + break; + } + if(i < 0 && levels[0].coord == 0){ + if (++it->root_coord == it->n_roots){ + it->root_coord = 0; + return 0; + } else { + goto do_root; + } + } + return 1; +} + +int +hwloc_distrib_iterator_next(hwloc_topology_t topology, + struct hwloc_distrib_iterator *it, + hwloc_obj_t *next){ + unsigned i; + struct hwloc_distrib_level *levels = it->levels[it->root_coord]; + hwloc_obj_t obj = it->roots[it->root_coord]; + unsigned coord; + + // Sort by depth to walk objects at set coordinates. + qsort(levels, it->n_levels, sizeof(*levels), hwloc_distrib_level_cmp_depth); + + for(i=0; in_levels; i++){ + coord = levels[i].index[levels[i].coord]; + obj = hwloc_get_obj_inside_cpuset_by_depth(topology, obj->cpuset, levels[i].depth, coord); + if( obj == NULL) + return hwloc_distrib_iterator_inc(it) && hwloc_distrib_iterator_next(topology, it, next); + + } + + *next = obj; + return hwloc_distrib_iterator_inc(it); +} diff --git a/hwloc/hwloc2.dtd b/hwloc/hwloc2.dtd index c206a9fba8..35dd329f56 100644 --- a/hwloc/hwloc2.dtd +++ b/hwloc/hwloc2.dtd @@ -7,11 +7,11 @@ This is the DTD for hwloc v2.x XMLs. --> - + - + @@ -30,6 +30,7 @@ + @@ -50,6 +51,11 @@ + + + + + diff --git a/hwloc/pci-common.c b/hwloc/pci-common.c index 59baf71437..deca5cce5e 100644 --- a/hwloc/pci-common.c +++ b/hwloc/pci-common.c @@ -140,7 +140,7 @@ hwloc_pci_discovery_prepare(struct hwloc_topology *topology) if (!err) { if (st.st_size <= 64*1024) { /* random limit large enough to store multiple cpusets for thousands of PUs */ buffer = malloc(st.st_size+1); - if (read(fd, buffer, st.st_size) == st.st_size) { + if (buffer && read(fd, buffer, st.st_size) == st.st_size) { buffer[st.st_size] = '\0'; hwloc_pci_forced_locality_parse(topology, buffer); } @@ -818,13 +818,14 @@ hwloc_pcidisc_find_linkspeed(const unsigned char *config, * PCIe Gen2 = 5 GT/s signal-rate per lane with 8/10 encoding = 0.5 GB/s data-rate per lane * PCIe Gen3 = 8 GT/s signal-rate per lane with 128/130 encoding = 1 GB/s data-rate per lane * PCIe Gen4 = 16 GT/s signal-rate per lane with 128/130 encoding = 2 GB/s data-rate per lane + * PCIe Gen5 = 32 GT/s signal-rate per lane with 128/130 encoding = 4 GB/s data-rate per lane */ /* lanespeed in Gbit/s */ if (speed <= 2) lanespeed = 2.5f * speed * 0.8f; else - lanespeed = 8.0f * (1<<(speed-3)) * 128/130; /* assume Gen5 will be 32 GT/s and so on */ + lanespeed = 8.0f * (1<<(speed-3)) * 128/130; /* assume Gen6 will be 64 GT/s and so on */ /* linkspeed in GB/s */ *linkspeed = lanespeed * width / 8; diff --git a/hwloc/shmem.c b/hwloc/shmem.c index ed2f4fdba8..94d55eef7b 100644 --- a/hwloc/shmem.c +++ b/hwloc/shmem.c @@ -1,5 +1,5 @@ /* - * Copyright © 2017-2018 Inria. All rights reserved. + * Copyright © 2017-2019 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -214,6 +214,8 @@ hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp, new->support.discovery = malloc(sizeof(*new->support.discovery)); new->support.cpubind = malloc(sizeof(*new->support.cpubind)); new->support.membind = malloc(sizeof(*new->support.membind)); + if (!new->support.discovery || !new->support.cpubind || !new->support.membind) + goto out_with_support; memcpy(new->support.discovery, old->support.discovery, sizeof(*new->support.discovery)); memcpy(new->support.cpubind, old->support.cpubind, sizeof(*new->support.cpubind)); memcpy(new->support.membind, old->support.membind, sizeof(*new->support.membind)); @@ -230,6 +232,11 @@ hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp, *topologyp = new; return 0; + out_with_support: + free(new->support.discovery); + free(new->support.cpubind); + free(new->support.membind); + free(new); out_with_components: hwloc_components_fini(); out_with_mmap: diff --git a/hwloc/topology-aix.c b/hwloc/topology-aix.c index f2b19a5095..73e6bea655 100644 --- a/hwloc/topology-aix.c +++ b/hwloc/topology-aix.c @@ -736,7 +736,7 @@ look_rset(int sdl, hwloc_obj_type_t type, struct hwloc_topology *topology, int l } static int -hwloc_look_aix(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus __hwloc_attribute_unused) +hwloc_look_aix(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) { /* * This backend uses the underlying OS. @@ -747,6 +747,8 @@ hwloc_look_aix(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus struct hwloc_topology *topology = backend->topology; int i; + assert(dstatus->phase == HWLOC_DISC_PHASE_CPU); + if (topology->levels[0][0]->cpuset) /* somebody discovered things */ return -1; @@ -869,6 +871,7 @@ hwloc_set_aix_hooks(struct hwloc_binding_hooks *hooks, static struct hwloc_backend * hwloc_aix_component_instantiate(struct hwloc_topology *topology, struct hwloc_disc_component *component, + unsigned excluded_phases __hwloc_attribute_unused, const void *_data1 __hwloc_attribute_unused, const void *_data2 __hwloc_attribute_unused, const void *_data3 __hwloc_attribute_unused) @@ -882,9 +885,9 @@ hwloc_aix_component_instantiate(struct hwloc_topology *topology, } static struct hwloc_disc_component hwloc_aix_disc_component = { - HWLOC_DISC_COMPONENT_TYPE_CPU, "aix", - HWLOC_DISC_COMPONENT_TYPE_GLOBAL, + HWLOC_DISC_PHASE_CPU, + HWLOC_DISC_PHASE_GLOBAL, hwloc_aix_component_instantiate, 50, 1, diff --git a/hwloc/topology-bgq.c b/hwloc/topology-bgq.c index 51d8763de5..8924d1d51d 100644 --- a/hwloc/topology-bgq.c +++ b/hwloc/topology-bgq.c @@ -57,6 +57,8 @@ hwloc_look_bgq(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) hwloc_obj_t obj; unsigned i; + assert(dstatus->phase == HWLOC_DISC_PHASE_GLOBAL); + if (topology->levels[0][0]->cpuset) /* somebody discovered things */ return -1; @@ -261,6 +263,7 @@ hwloc_set_bgq_hooks(struct hwloc_binding_hooks *hooks, static struct hwloc_backend * hwloc_bgq_component_instantiate(struct hwloc_topology *topology, struct hwloc_disc_component *component, + unsigned excluded_phases __hwloc_attribute_unused, const void *_data1 __hwloc_attribute_unused, const void *_data2 __hwloc_attribute_unused, const void *_data3 __hwloc_attribute_unused) @@ -297,8 +300,8 @@ hwloc_bgq_component_instantiate(struct hwloc_topology *topology, } static struct hwloc_disc_component hwloc_bgq_disc_component = { - HWLOC_DISC_COMPONENT_TYPE_GLOBAL, "bgq", + HWLOC_DISC_PHASE_GLOBAL, ~0, hwloc_bgq_component_instantiate, 50, diff --git a/hwloc/topology-cuda.c b/hwloc/topology-cuda.c index 17b2ff1634..782e5a5a2e 100644 --- a/hwloc/topology-cuda.c +++ b/hwloc/topology-cuda.c @@ -53,7 +53,7 @@ static unsigned hwloc_cuda_cores_per_MP(int major, int minor) } static int -hwloc_cuda_discover(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus __hwloc_attribute_unused) +hwloc_cuda_discover(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) { /* * This backend uses the underlying OS. @@ -66,6 +66,8 @@ hwloc_cuda_discover(struct hwloc_backend *backend, struct hwloc_disc_status *dst cudaError_t cures; int nb, i; + assert(dstatus->phase == HWLOC_DISC_PHASE_IO); + hwloc_topology_get_type_filter(topology, HWLOC_OBJ_OS_DEVICE, &filter); if (filter == HWLOC_TYPE_FILTER_KEEP_NONE) return 0; @@ -130,6 +132,7 @@ hwloc_cuda_discover(struct hwloc_backend *backend, struct hwloc_disc_status *dst static struct hwloc_backend * hwloc_cuda_component_instantiate(struct hwloc_topology *topology, struct hwloc_disc_component *component, + unsigned excluded_phases __hwloc_attribute_unused, const void *_data1 __hwloc_attribute_unused, const void *_data2 __hwloc_attribute_unused, const void *_data3 __hwloc_attribute_unused) @@ -145,9 +148,9 @@ hwloc_cuda_component_instantiate(struct hwloc_topology *topology, } static struct hwloc_disc_component hwloc_cuda_disc_component = { - HWLOC_DISC_COMPONENT_TYPE_MISC, "cuda", - HWLOC_DISC_COMPONENT_TYPE_GLOBAL, + HWLOC_DISC_PHASE_IO, + HWLOC_DISC_PHASE_GLOBAL, hwloc_cuda_component_instantiate, 10, /* after pci */ 1, diff --git a/hwloc/topology-darwin.c b/hwloc/topology-darwin.c index b0fb83eb25..3665d7484a 100644 --- a/hwloc/topology-darwin.c +++ b/hwloc/topology-darwin.c @@ -23,7 +23,7 @@ #include "private/debug.h" static int -hwloc_look_darwin(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus __hwloc_attribute_unused) +hwloc_look_darwin(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) { /* * This backend uses the underlying OS. @@ -51,6 +51,8 @@ hwloc_look_darwin(struct hwloc_backend *backend, struct hwloc_disc_status *dstat int gotnuma = 0; int gotnumamemory = 0; + assert(dstatus->phase == HWLOC_DISC_PHASE_CPU); + if (topology->levels[0][0]->cpuset) /* somebody discovered things */ return -1; @@ -367,6 +369,7 @@ hwloc_set_darwin_hooks(struct hwloc_binding_hooks *hooks __hwloc_attribute_unuse static struct hwloc_backend * hwloc_darwin_component_instantiate(struct hwloc_topology *topology, struct hwloc_disc_component *component, + unsigned excluded_phases __hwloc_attribute_unused, const void *_data1 __hwloc_attribute_unused, const void *_data2 __hwloc_attribute_unused, const void *_data3 __hwloc_attribute_unused) @@ -380,9 +383,9 @@ hwloc_darwin_component_instantiate(struct hwloc_topology *topology, } static struct hwloc_disc_component hwloc_darwin_disc_component = { - HWLOC_DISC_COMPONENT_TYPE_CPU, "darwin", - HWLOC_DISC_COMPONENT_TYPE_GLOBAL, + HWLOC_DISC_PHASE_CPU, + HWLOC_DISC_PHASE_GLOBAL, hwloc_darwin_component_instantiate, 50, 1, diff --git a/hwloc/topology-fake.c b/hwloc/topology-fake.c index 52cd372f0d..ba50cbfcfe 100644 --- a/hwloc/topology-fake.c +++ b/hwloc/topology-fake.c @@ -9,21 +9,58 @@ #include +static int +hwloc_look_fake(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) +{ + hwloc_topology_t topology = backend->topology; + + assert(dstatus->phase == HWLOC_DISC_PHASE_TWEAK); + + if (getenv("HWLOC_DEBUG_FAKE_COMPONENT_TWEAK")) { + hwloc_obj_t obj; + int err; + /* restrict to single (last) PU */ + obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU)-1); + assert(obj); + err = hwloc_topology_restrict(topology, obj->cpuset, 0); + assert(!err); + /* restrict to single (first) NUMA node */ + obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, 0); + assert(obj); + err = hwloc_topology_restrict(topology, obj->nodeset, HWLOC_RESTRICT_FLAG_BYNODESET); + assert(!err); + } + + return 0; +} + static struct hwloc_backend * hwloc_fake_component_instantiate(struct hwloc_topology *topology __hwloc_attribute_unused, struct hwloc_disc_component *component __hwloc_attribute_unused, + unsigned excluded_phases __hwloc_attribute_unused, const void *_data1 __hwloc_attribute_unused, const void *_data2 __hwloc_attribute_unused, const void *_data3 __hwloc_attribute_unused) { + struct hwloc_backend *backend; + + backend = hwloc_backend_alloc(topology, component); + if (!backend) + goto out; + backend->discover = hwloc_look_fake; + if (getenv("HWLOC_DEBUG_FAKE_COMPONENT")) printf("fake component instantiated\n"); + + return backend; + + out: return NULL; } static struct hwloc_disc_component hwloc_fake_disc_component = { - HWLOC_DISC_COMPONENT_TYPE_MISC, /* so that it's always enabled when using the OS discovery */ "fake", + HWLOC_DISC_PHASE_TWEAK, 0, /* nothing to exclude */ hwloc_fake_component_instantiate, 100, /* make sure it's loaded before anything conflicting excludes it */ diff --git a/hwloc/topology-freebsd.c b/hwloc/topology-freebsd.c index c7f25d426a..a4a7147eb3 100644 --- a/hwloc/topology-freebsd.c +++ b/hwloc/topology-freebsd.c @@ -179,7 +179,7 @@ hwloc_freebsd_node_meminfo_info(struct hwloc_topology *topology) #endif static int -hwloc_look_freebsd(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus __hwloc_attribute_unused) +hwloc_look_freebsd(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) { /* * This backend uses the underlying OS. @@ -189,6 +189,8 @@ hwloc_look_freebsd(struct hwloc_backend *backend, struct hwloc_disc_status *dsta struct hwloc_topology *topology = backend->topology; + assert(dstatus->phase == HWLOC_DISC_PHASE_CPU); + if (!topology->levels[0][0]->cpuset) { /* Nobody (even the x86 backend) created objects yet, setup basic objects */ int nbprocs = hwloc_fallback_nbprocessors(0); @@ -235,6 +237,7 @@ hwloc_set_freebsd_hooks(struct hwloc_binding_hooks *hooks __hwloc_attribute_unus static struct hwloc_backend * hwloc_freebsd_component_instantiate(struct hwloc_topology *topology, struct hwloc_disc_component *component, + unsigned excluded_phases __hwloc_attribute_unused, const void *_data1 __hwloc_attribute_unused, const void *_data2 __hwloc_attribute_unused, const void *_data3 __hwloc_attribute_unused) @@ -248,9 +251,9 @@ hwloc_freebsd_component_instantiate(struct hwloc_topology *topology, } static struct hwloc_disc_component hwloc_freebsd_disc_component = { - HWLOC_DISC_COMPONENT_TYPE_CPU, "freebsd", - HWLOC_DISC_COMPONENT_TYPE_GLOBAL, + HWLOC_DISC_PHASE_CPU, + HWLOC_DISC_PHASE_GLOBAL, hwloc_freebsd_component_instantiate, 50, 1, diff --git a/hwloc/topology-gl.c b/hwloc/topology-gl.c index d20c0ec839..8cef9b67a5 100644 --- a/hwloc/topology-gl.c +++ b/hwloc/topology-gl.c @@ -22,7 +22,7 @@ #define HWLOC_GL_SCREEN_MAX 10 static int -hwloc_gl_discover(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus __hwloc_attribute_unused) +hwloc_gl_discover(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) { /* * This backend uses the underlying OS. @@ -35,6 +35,8 @@ hwloc_gl_discover(struct hwloc_backend *backend, struct hwloc_disc_status *dstat unsigned i; int err; + assert(dstatus->phase == HWLOC_DISC_PHASE_IO); + hwloc_topology_get_type_filter(topology, HWLOC_OBJ_OS_DEVICE, &filter); if (filter == HWLOC_TYPE_FILTER_KEEP_NONE) return 0; @@ -144,6 +146,7 @@ hwloc_gl_discover(struct hwloc_backend *backend, struct hwloc_disc_status *dstat static struct hwloc_backend * hwloc_gl_component_instantiate(struct hwloc_topology *topology, struct hwloc_disc_component *component, + unsigned excluded_phases __hwloc_attribute_unused, const void *_data1 __hwloc_attribute_unused, const void *_data2 __hwloc_attribute_unused, const void *_data3 __hwloc_attribute_unused) @@ -158,9 +161,9 @@ hwloc_gl_component_instantiate(struct hwloc_topology *topology, } static struct hwloc_disc_component hwloc_gl_disc_component = { - HWLOC_DISC_COMPONENT_TYPE_MISC, "gl", - HWLOC_DISC_COMPONENT_TYPE_GLOBAL, + HWLOC_DISC_PHASE_IO, + HWLOC_DISC_PHASE_GLOBAL, hwloc_gl_component_instantiate, 10, /* after pci */ 1, diff --git a/hwloc/topology-hpux.c b/hwloc/topology-hpux.c index c8c22571c3..280900cbc9 100644 --- a/hwloc/topology-hpux.c +++ b/hwloc/topology-hpux.c @@ -175,7 +175,7 @@ hwloc_hpux_alloc_membind(hwloc_topology_t topology, size_t len, hwloc_const_node #endif /* MAP_MEM_FIRST_TOUCH */ static int -hwloc_look_hpux(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus __hwloc_attribute_unused) +hwloc_look_hpux(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) { /* * This backend uses the underlying OS. @@ -190,6 +190,8 @@ hwloc_look_hpux(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus hwloc_obj_t *nodes, obj; int i, nbnodes = 0; + assert(dstatus->phase == HWLOC_DISC_PHASE_CPU); + if (topology->levels[0][0]->cpuset) /* somebody discovered things */ return -1; @@ -299,6 +301,7 @@ hwloc_set_hpux_hooks(struct hwloc_binding_hooks *hooks, static struct hwloc_backend * hwloc_hpux_component_instantiate(struct hwloc_topology *topology, struct hwloc_disc_component *component, + unsigned excluded_phases __hwloc_attribute_unused, const void *_data1 __hwloc_attribute_unused, const void *_data2 __hwloc_attribute_unused, const void *_data3 __hwloc_attribute_unused) @@ -312,9 +315,9 @@ hwloc_hpux_component_instantiate(struct hwloc_topology *topology, } static struct hwloc_disc_component hwloc_hpux_disc_component = { - HWLOC_DISC_COMPONENT_TYPE_CPU, "hpux", - HWLOC_DISC_COMPONENT_TYPE_GLOBAL, + HWLOC_DISC_PHASE_CPU, + HWLOC_DISC_PHASE_GLOBAL, hwloc_hpux_component_instantiate, 50, 1, diff --git a/hwloc/topology-linux.c b/hwloc/topology-linux.c index 2aa6f97b8d..fd737e504a 100644 --- a/hwloc/topology-linux.c +++ b/hwloc/topology-linux.c @@ -57,6 +57,9 @@ struct hwloc_linux_backend_data_s { int is_knl; int is_amd_with_CU; int use_dt; + int use_numa_distances; + int use_numa_distances_for_cpuless; + int use_numa_initiators; struct utsname utsname; /* fields contain \0 when unknown */ int fallback_nbprocessors; /* only used in hwloc_linux_fallback_pu_level(), maybe be <= 0 (error) earlier */ unsigned pagesize; @@ -2238,8 +2241,10 @@ hwloc_find_linux_cpuset_mntpnt(char **cgroup_mntpnt, char **cpuset_mntpnt, const */ bufsize = hwloc_getpagesize()*4; buf = malloc(bufsize); - if (!buf) + if (!buf) { + endmntent(fd); return; + } while (getmntent_r(fd, &mntent, buf, bufsize)) { if (!strcmp(mntent.mnt_type, "cpuset")) { @@ -3987,11 +3992,20 @@ annotate_sysfsnode(struct hwloc_topology *topology, return 0; } - for(i=0, node=hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, NULL); - ios_index) { + nodes[i] = node; + break; + } + hwloc_get_sysfs_node_meminfo(data, path, node->os_index, &node->attr->numanode); } @@ -3999,8 +4013,10 @@ annotate_sysfsnode(struct hwloc_topology *topology, topology->support.discovery->numa_memory = 1; topology->support.discovery->disallowed_numa = 1; - if (nbnodes >= 2 && hwloc_parse_nodes_distances(path, nbnodes, indexes, distances, data->root_fd) < 0) { - hwloc_internal_distances_add(topology, nbnodes, nodes, distances, + if (nbnodes >= 2 + && data->use_numa_distances + && !hwloc_parse_nodes_distances(path, nbnodes, indexes, distances, data->root_fd)) { + hwloc_internal_distances_add(topology, "NUMALatency", nbnodes, nodes, distances, HWLOC_DISTANCES_KIND_FROM_OS|HWLOC_DISTANCES_KIND_MEANS_LATENCY, HWLOC_DISTANCES_ADD_FLAG_GROUP); } else { @@ -4166,6 +4182,7 @@ look_sysfsnode(struct hwloc_topology *topology, } } } + closedir(dir); } topology->support.discovery->numa = 1; @@ -4178,6 +4195,10 @@ look_sysfsnode(struct hwloc_topology *topology, /* failed to read/create some nodes, don't bother reading/fixing * a distance matrix that would likely be wrong anyway. */ + data->use_numa_distances = 0; + } + + if (!data->use_numa_distances) { free(distances); distances = NULL; } @@ -4197,6 +4218,7 @@ look_sysfsnode(struct hwloc_topology *topology, hwloc_linux_knl_numa_quirk(topology, data, nodes, nbnodes, distances, &failednodes); free(distances); free(nodes); + free(trees); goto out; } } @@ -4212,7 +4234,8 @@ look_sysfsnode(struct hwloc_topology *topology, if (node && !hwloc_bitmap_iszero(node->cpuset)) { hwloc_obj_t tree; /* update from HMAT initiators if any */ - read_node_initiators(data, node, nbnodes, nodes, path); + if (data->use_numa_initiators) + read_node_initiators(data, node, nbnodes, nodes, path); tree = node; if (need_memcaches) @@ -4230,12 +4253,13 @@ look_sysfsnode(struct hwloc_topology *topology, if (node && hwloc_bitmap_iszero(node->cpuset)) { hwloc_obj_t tree; /* update from HMAT initiators if any */ - if (!read_node_initiators(data, node, nbnodes, nodes, path)) - if (!hwloc_bitmap_iszero(node->cpuset)) - goto fixed; + if (data->use_numa_initiators) + if (!read_node_initiators(data, node, nbnodes, nodes, path)) + if (!hwloc_bitmap_iszero(node->cpuset)) + goto fixed; /* if HMAT didn't help, try to find locality of CPU-less NUMA nodes by looking at their distances */ - if (distances) + if (distances && data->use_numa_distances_for_cpuless) fixup_cpuless_node_locality_from_distances(i, nbnodes, nodes, distances); fixed: @@ -4273,7 +4297,7 @@ look_sysfsnode(struct hwloc_topology *topology, /* Inserted distances now that nodes are properly inserted */ if (distances) - hwloc_internal_distances_add(topology, nbnodes, nodes, distances, + hwloc_internal_distances_add(topology, "NUMALatency", nbnodes, nodes, distances, HWLOC_DISTANCES_KIND_FROM_OS|HWLOC_DISTANCES_KIND_MEANS_LATENCY, HWLOC_DISTANCES_ADD_FLAG_GROUP); else @@ -4288,7 +4312,7 @@ look_sysfsnode(struct hwloc_topology *topology, static int look_sysfscpu(struct hwloc_topology *topology, struct hwloc_linux_backend_data_s *data, - const char *path, + const char *path, int old_filenames, struct hwloc_linux_cpuinfo_proc * cpuinfo_Lprocs, unsigned cpuinfo_numprocs) { hwloc_bitmap_t cpuset; /* Set of cpus for which we have topology information */ @@ -4297,8 +4321,7 @@ look_sysfscpu(struct hwloc_topology *topology, char str[CPU_TOPOLOGY_STR_LEN]; DIR *dir; int i,j; - unsigned caches_added, merge_buggy_core_siblings; - hwloc_obj_t packages = NULL; /* temporary list of packages before actual insert in the tree */ + unsigned caches_added; int threadwithcoreid = data->is_amd_with_CU ? -1 : 0; /* -1 means we don't know yet if threads have their own coreids within thread_siblings */ /* try to get the list of online CPUs at once. @@ -4365,16 +4388,20 @@ look_sysfscpu(struct hwloc_topology *topology, hwloc_debug_1arg_bitmap("found %d cpu topologies, cpuset %s\n", hwloc_bitmap_weight(cpuset), cpuset); - merge_buggy_core_siblings = (data->arch == HWLOC_LINUX_ARCH_X86); caches_added = 0; hwloc_bitmap_foreach_begin(i, cpuset) { - hwloc_bitmap_t packageset, coreset, bookset, drawerset, threadset; int tmpint; int notfirstofcore = 0; /* set if we have core info and if we're not the first PU of our core */ + int notfirstofdie = 0; /* set if we have die info and if we're not the first PU of our die */ + hwloc_bitmap_t dieset = NULL; if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_CORE)) { /* look at the core */ - sprintf(str, "%s/cpu%d/topology/thread_siblings", path, i); + hwloc_bitmap_t coreset; + if (old_filenames) + sprintf(str, "%s/cpu%d/topology/thread_siblings", path, i); + else + sprintf(str, "%s/cpu%d/topology/core_cpus", path, i); coreset = hwloc__alloc_read_path_as_cpumask(str, data->root_fd); if (coreset) { unsigned mycoreid = (unsigned) -1; @@ -4428,12 +4455,38 @@ look_sysfscpu(struct hwloc_topology *topology, } if (!notfirstofcore /* don't look at the package unless we are the first of the core */ + && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_DIE)) { + /* look at the die */ + sprintf(str, "%s/cpu%d/topology/die_cpus", path, i); + dieset = hwloc__alloc_read_path_as_cpumask(str, data->root_fd); + if (dieset) { + hwloc_bitmap_and(dieset, dieset, cpuset); + if (hwloc_bitmap_first(dieset) != i) { + /* not first cpu in this die, ignore the die */ + hwloc_bitmap_free(dieset); + dieset = NULL; + notfirstofdie = 1; + } + /* look at packages before deciding whether we keep that die or not */ + } + } + + if (!notfirstofdie /* don't look at the package unless we are the first of the die */ && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_PACKAGE)) { /* look at the package */ - sprintf(str, "%s/cpu%d/topology/core_siblings", path, i); + hwloc_bitmap_t packageset; + if (old_filenames) + sprintf(str, "%s/cpu%d/topology/core_siblings", path, i); + else + sprintf(str, "%s/cpu%d/topology/package_cpus", path, i); packageset = hwloc__alloc_read_path_as_cpumask(str, data->root_fd); if (packageset) { hwloc_bitmap_and(packageset, packageset, cpuset); + if (dieset && hwloc_bitmap_isequal(packageset, dieset)) { + /* die is identical to package, ignore it */ + hwloc_bitmap_free(dieset); + dieset = NULL; + } if (hwloc_bitmap_first(packageset) == i) { /* first cpu in this package, add the package */ struct hwloc_obj *package; @@ -4443,44 +4496,6 @@ look_sysfscpu(struct hwloc_topology *topology, if (hwloc_read_path_as_int(str, &tmpint, data->root_fd) == 0) mypackageid = (unsigned) tmpint; - if (merge_buggy_core_siblings) { - /* check for another package with same physical_package_id */ - hwloc_obj_t curpackage = packages; - while (curpackage) { - if (curpackage->os_index == mypackageid) { - /* found another package with same physical_package_id but different core_siblings. - * looks like a buggy kernel on Intel Xeon E5 v3 processor with two rings. - * merge these core_siblings to extend the existing first package object. - */ - static int reported = 0; - if (!reported && !hwloc_hide_errors()) { - char *a, *b; - hwloc_bitmap_asprintf(&a, curpackage->cpuset); - hwloc_bitmap_asprintf(&b, packageset); - fprintf(stderr, "****************************************************************************\n"); - fprintf(stderr, "* hwloc %s has detected buggy sysfs package information: Two packages have\n", HWLOC_VERSION); - fprintf(stderr, "* the same physical package id %u but different core_siblings %s and %s\n", - mypackageid, a, b); - fprintf(stderr, "* hwloc is merging these packages into a single one assuming your Linux kernel\n"); - fprintf(stderr, "* does not support this processor correctly.\n"); - fprintf(stderr, "* You may hide this warning by setting HWLOC_HIDE_ERRORS=1 in the environment.\n"); - fprintf(stderr, "*\n"); - fprintf(stderr, "* If hwloc does not report the right number of packages,\n"); - fprintf(stderr, "* please report this error message to the hwloc user's mailing list,\n"); - fprintf(stderr, "* along with the files generated by the hwloc-gather-topology script.\n"); - fprintf(stderr, "****************************************************************************\n"); - reported = 1; - free(a); - free(b); - } - hwloc_bitmap_or(curpackage->cpuset, curpackage->cpuset, packageset); - goto package_done; - } - curpackage = curpackage->next_cousin; - } - } - - /* no package with same physical_package_id, create a new one */ package = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PACKAGE, mypackageid); package->cpuset = packageset; hwloc_debug_1arg_bitmap("os package %u has cpuset %s\n", @@ -4493,22 +4508,32 @@ look_sysfscpu(struct hwloc_topology *topology, &cpuinfo_Lprocs[j].infos, &cpuinfo_Lprocs[j].infos_count); } } - /* insert in a temporary list in case we have to modify the cpuset by merging other core_siblings later. - * we'll actually insert the tree at the end of the entire sysfs cpu loop. - */ - package->next_cousin = packages; - packages = package; - + hwloc_insert_object_by_cpuset(topology, package); packageset = NULL; /* don't free it */ } - package_done: hwloc_bitmap_free(packageset); } } + if (dieset) { + struct hwloc_obj *die; + unsigned mydieid; + mydieid = (unsigned) -1; + sprintf(str, "%s/cpu%d/topology/die_id", path, i); /* contains %d when added in 5.2 */ + if (hwloc_read_path_as_int(str, &tmpint, data->root_fd) == 0) + mydieid = (unsigned) tmpint; + + die = hwloc_alloc_setup_object(topology, HWLOC_OBJ_DIE, mydieid); + die->cpuset = dieset; + hwloc_debug_1arg_bitmap("os die %u has cpuset %s\n", + mydieid, dieset); + hwloc_insert_object_by_cpuset(topology, die); + } + if (data->arch == HWLOC_LINUX_ARCH_S390 && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_GROUP)) { /* look at the books */ + hwloc_bitmap_t bookset, drawerset; sprintf(str, "%s/cpu%d/topology/book_siblings", path, i); bookset = hwloc__alloc_read_path_as_cpumask(str, data->root_fd); if (bookset) { @@ -4565,6 +4590,7 @@ look_sysfscpu(struct hwloc_topology *topology, /* PU cannot be filtered-out */ { /* look at the thread */ + hwloc_bitmap_t threadset; struct hwloc_obj *thread = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PU, (unsigned) i); threadset = hwloc_bitmap_alloc(); hwloc_bitmap_only(threadset, i); @@ -4585,7 +4611,10 @@ look_sysfscpu(struct hwloc_topology *topology, if (hwloc_bitmap_iszero(cacheset)) { /* ia64 returning empty L3 and L2i? use the core set instead */ hwloc_bitmap_t tmpset; - sprintf(str, "%s/cpu%d/topology/thread_siblings", path, i); + if (old_filenames) + sprintf(str, "%s/cpu%d/topology/thread_siblings", path, i); + else + sprintf(str, "%s/cpu%d/topology/core_cpus", path, i); tmpset = hwloc__alloc_read_path_as_cpumask(str, data->root_fd); /* only use it if we actually got something */ if (tmpset) { @@ -4686,14 +4715,6 @@ look_sysfscpu(struct hwloc_topology *topology, } hwloc_bitmap_foreach_end(); - /* actually insert in the tree now that package cpusets have been fixed-up */ - while (packages) { - hwloc_obj_t next = packages->next_cousin; - packages->next_cousin = NULL; - hwloc_insert_object_by_cpuset(topology, packages); - packages = next; - } - if (0 == caches_added && data->use_dt) look_powerpc_device_tree(topology, data); @@ -5033,24 +5054,6 @@ hwloc_linux_free_cpuinfo(struct hwloc_linux_cpuinfo_proc * Lprocs, unsigned nump ****** Main Topology Discovery ****** *************************************/ -static void -hwloc__linux_get_mic_sn(struct hwloc_topology *topology, struct hwloc_linux_backend_data_s *data) -{ - char line[64], *tmp, *end; - if (hwloc_read_path_by_length("/proc/elog", line, sizeof(line), data->root_fd) < 0) - return; - if (strncmp(line, "Card ", 5)) - return; - tmp = line + 5; - end = strchr(tmp, ':'); - if (!end) - return; - *end = '\0'; - - if (tmp[0]) - hwloc_obj_add_info(hwloc_get_root_obj(topology), "MICSerialNumber", tmp); -} - static void hwloc_gather_system_info(struct hwloc_topology *topology, struct hwloc_linux_backend_data_s *data) @@ -5229,8 +5232,52 @@ hwloc_linux_fallback_pu_level(struct hwloc_backend *backend) hwloc_setup_pu_level(topology, data->fallback_nbprocessors); } +static const char *find_sysfs_cpu_path(int root_fd, int *old_filenames) +{ + if (!hwloc_access("/sys/bus/cpu/devices", R_OK|X_OK, root_fd)) { + if (!hwloc_access("/sys/bus/cpu/devices/cpu0/topology/package_cpus", R_OK, root_fd) + || !hwloc_access("/sys/bus/cpu/devices/cpu0/topology/core_cpus", R_OK, root_fd)) { + return "/sys/bus/cpu/devices"; + } + + if (!hwloc_access("/sys/bus/cpu/devices/cpu0/topology/core_siblings", R_OK, root_fd) + || !hwloc_access("/sys/bus/cpu/devices/cpu0/topology/thread_siblings", R_OK, root_fd)) { + *old_filenames = 1; + return "/sys/bus/cpu/devices"; + } + } + + if (!hwloc_access("/sys/devices/system/cpu", R_OK|X_OK, root_fd)) { + if (!hwloc_access("/sys/devices/system/cpu/cpu0/topology/package_cpus", R_OK, root_fd) + || !hwloc_access("/sys/devices/system/cpu/cpu0/topology/core_cpus", R_OK, root_fd)) { + return "/sys/devices/system/cpu"; + } + + if (!hwloc_access("/sys/devices/system/cpu/cpu0/topology/core_siblings", R_OK, root_fd) + || !hwloc_access("/sys/devices/system/cpu/cpu0/topology/thread_siblings", R_OK, root_fd)) { + *old_filenames = 1; + return "/sys/devices/system/cpu"; + } + } + + return NULL; +} + +static const char *find_sysfs_node_path(int root_fd) +{ + if (!hwloc_access("/sys/bus/node/devices", R_OK|X_OK, root_fd) + && !hwloc_access("/sys/bus/node/devices/node0/cpumap", R_OK, root_fd)) + return "/sys/bus/node/devices"; + + if (!hwloc_access("/sys/devices/system/node", R_OK|X_OK, root_fd) + && !hwloc_access("/sys/devices/system/node/node0/cpumap", R_OK, root_fd)) + return "/sys/devices/system/node"; + + return NULL; +} + static int -hwloc_look_linuxfs(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus __hwloc_attribute_unused) +hwloc_linuxfs_look_cpu(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) { /* * This backend may be used with topology->is_thissystem set (default) @@ -5249,31 +5296,18 @@ hwloc_look_linuxfs(struct hwloc_backend *backend, struct hwloc_disc_status *dsta int already_numanodes; const char *sysfs_cpu_path; const char *sysfs_node_path; + int old_siblings_filenames = 0; int err; /* look for sysfs cpu path containing at least one of core_siblings and thread_siblings */ - if (!hwloc_access("/sys/bus/cpu/devices", R_OK|X_OK, data->root_fd) - && (!hwloc_access("/sys/bus/cpu/devices/cpu0/topology/thread_siblings", R_OK, data->root_fd) - || !hwloc_access("/sys/bus/cpu/devices/cpu0/topology/core_siblings", R_OK, data->root_fd))) - sysfs_cpu_path = "/sys/bus/cpu/devices"; - else if (!hwloc_access("/sys/devices/system/cpu", R_OK|X_OK, data->root_fd) - && (!hwloc_access("/sys/devices/system/cpu/cpu0/topology/core_siblings", R_OK, data->root_fd) - || !hwloc_access("/sys/devices/system/cpu/cpu0/topology/thread_siblings", R_OK, data->root_fd))) - sysfs_cpu_path = "/sys/devices/system/cpu"; - else - sysfs_cpu_path = NULL; - hwloc_debug("Found sysfs cpu files under %s\n", sysfs_cpu_path); + sysfs_cpu_path = find_sysfs_cpu_path(data->root_fd, &old_siblings_filenames); + hwloc_debug("Found sysfs cpu files under %s with %s topology filenames\n", + sysfs_cpu_path, old_siblings_filenames ? "old" : "new"); /* look for sysfs node path */ - if (!hwloc_access("/sys/bus/node/devices", R_OK|X_OK, data->root_fd) - && !hwloc_access("/sys/bus/node/devices/node0/cpumap", R_OK, data->root_fd)) - sysfs_node_path = "/sys/bus/node/devices"; - else if (!hwloc_access("/sys/devices/system/node", R_OK|X_OK, data->root_fd) - && !hwloc_access("/sys/devices/system/node/node0/cpumap", R_OK, data->root_fd)) - sysfs_node_path = "/sys/devices/system/node"; - else - sysfs_node_path = NULL; - hwloc_debug("Found sysfs node files under %s\n", sysfs_node_path); + sysfs_node_path = find_sysfs_node_path(data->root_fd); + hwloc_debug("Found sysfs node files under %s\n", + sysfs_node_path); already_pus = (topology->levels[0][0]->complete_cpuset != NULL && !hwloc_bitmap_iszero(topology->levels[0][0]->complete_cpuset)); @@ -5364,7 +5398,7 @@ hwloc_look_linuxfs(struct hwloc_backend *backend, struct hwloc_disc_status *dsta } else { /* sysfs */ - if (look_sysfscpu(topology, data, sysfs_cpu_path, Lprocs, numprocs) < 0) + if (look_sysfscpu(topology, data, sysfs_cpu_path, old_siblings_filenames, Lprocs, numprocs) < 0) /* sysfs but we failed to read cpu topology, fallback */ hwloc_linux_fallback_pu_level(backend); } @@ -5400,8 +5434,6 @@ hwloc_look_linuxfs(struct hwloc_backend *backend, struct hwloc_disc_status *dsta free(cpuset_name); } - hwloc__linux_get_mic_sn(topology, data); - /* data->utsname was filled with real uname or \0, we can safely pass it */ hwloc_add_uname_info(topology, &data->utsname); @@ -5437,147 +5469,10 @@ hwloc_linux_backend_get_pci_busid_cpuset(struct hwloc_backend *backend, -/******************************* - ******* Linux component ******* - *******************************/ - -static void -hwloc_linux_backend_disable(struct hwloc_backend *backend) -{ - struct hwloc_linux_backend_data_s *data = backend->private_data; -#ifdef HAVE_OPENAT - free(data->root_path); - close(data->root_fd); -#endif -#ifdef HWLOC_HAVE_LIBUDEV - if (data->udev) - udev_unref(data->udev); -#endif - free(data); -} - -static struct hwloc_backend * -hwloc_linux_component_instantiate(struct hwloc_topology *topology, - struct hwloc_disc_component *component, - const void *_data1 __hwloc_attribute_unused, - const void *_data2 __hwloc_attribute_unused, - const void *_data3 __hwloc_attribute_unused) -{ - struct hwloc_backend *backend; - struct hwloc_linux_backend_data_s *data; - const char * fsroot_path; - int root = -1; - char *env; - - backend = hwloc_backend_alloc(topology, component); - if (!backend) - goto out; - - data = malloc(sizeof(*data)); - if (!data) { - errno = ENOMEM; - goto out_with_backend; - } - - backend->private_data = data; - backend->discover = hwloc_look_linuxfs; - backend->get_pci_busid_cpuset = hwloc_linux_backend_get_pci_busid_cpuset; - backend->disable = hwloc_linux_backend_disable; - - /* default values */ - data->arch = HWLOC_LINUX_ARCH_UNKNOWN; - data->is_knl = 0; - data->is_amd_with_CU = 0; - data->use_dt = 0; - data->is_real_fsroot = 1; - data->root_path = NULL; - fsroot_path = getenv("HWLOC_FSROOT"); - if (!fsroot_path) - fsroot_path = "/"; - - if (strcmp(fsroot_path, "/")) { -#ifdef HAVE_OPENAT - int flags; - - root = open(fsroot_path, O_RDONLY | O_DIRECTORY); - if (root < 0) - goto out_with_data; - - backend->is_thissystem = 0; - data->is_real_fsroot = 0; - data->root_path = strdup(fsroot_path); - - /* Since this fd stays open after hwloc returns, mark it as - close-on-exec so that children don't inherit it. Stevens says - that we should GETFD before we SETFD, so we do. */ - flags = fcntl(root, F_GETFD, 0); - if (-1 == flags || - -1 == fcntl(root, F_SETFD, FD_CLOEXEC | flags)) { - close(root); - root = -1; - goto out_with_data; - } -#else - fprintf(stderr, "Cannot change Linux fsroot without openat() support.\n"); - errno = ENOSYS; - goto out_with_data; -#endif - } - data->root_fd = root; - -#ifdef HWLOC_HAVE_LIBUDEV - data->udev = NULL; - if (data->is_real_fsroot) { - data->udev = udev_new(); - } -#endif - - data->dumped_hwdata_dirname = getenv("HWLOC_DUMPED_HWDATA_DIR"); - if (!data->dumped_hwdata_dirname) - data->dumped_hwdata_dirname = (char *) RUNSTATEDIR "/hwloc/"; - - env = getenv("HWLOC_USE_DT"); - if (env) - data->use_dt = atoi(env); - - return backend; - - out_with_data: -#ifdef HAVE_OPENAT - free(data->root_path); -#endif - free(data); - out_with_backend: - free(backend); - out: - return NULL; -} - -static struct hwloc_disc_component hwloc_linux_disc_component = { - HWLOC_DISC_COMPONENT_TYPE_CPU, - "linux", - HWLOC_DISC_COMPONENT_TYPE_GLOBAL, - hwloc_linux_component_instantiate, - 50, - 1, - NULL -}; - -const struct hwloc_component hwloc_linux_component = { - HWLOC_COMPONENT_ABI, - NULL, NULL, - HWLOC_COMPONENT_TYPE_DISC, - 0, - &hwloc_linux_disc_component -}; - - - - #ifdef HWLOC_HAVE_LINUXIO /*********************************** - ******* Linux I/O component ******* + ******* Linux I/O discovery ******* ***********************************/ #define HWLOC_LINUXFS_OSDEV_FLAG_FIND_VIRTUAL (1U<<0) @@ -6034,6 +5929,7 @@ hwloc_linuxfs_net_class_fillinfos(int root_fd, struct stat st; char path[296]; /* osdevpath <= 256 */ char address[128]; + int err; snprintf(path, sizeof(path), "%s/address", osdevpath); if (!hwloc_read_path_by_length(path, address, sizeof(address), root_fd)) { char *eol = strchr(address, '\n'); @@ -6044,8 +5940,14 @@ hwloc_linuxfs_net_class_fillinfos(int root_fd, snprintf(path, sizeof(path), "%s/device/infiniband", osdevpath); if (!hwloc_stat(path, &st, root_fd)) { char hexid[16]; - snprintf(path, sizeof(path), "%s/dev_id", osdevpath); - if (!hwloc_read_path_by_length(path, hexid, sizeof(hexid), root_fd)) { + snprintf(path, sizeof(path), "%s/dev_port", osdevpath); + err = hwloc_read_path_by_length(path, hexid, sizeof(hexid), root_fd); + if (err < 0) { + /* fallback t dev_id for old kernels/drivers */ + snprintf(path, sizeof(path), "%s/dev_id", osdevpath); + err = hwloc_read_path_by_length(path, hexid, sizeof(hexid), root_fd); + } + if (!err) { char *eoid; unsigned long port; port = strtoul(hexid, &eoid, 0); @@ -6216,95 +6118,6 @@ hwloc_linuxfs_lookup_infiniband_class(struct hwloc_backend *backend, unsigned os return 0; } -static void -hwloc_linuxfs_mic_class_fillinfos(int root_fd, - struct hwloc_obj *obj, const char *osdevpath) -{ - char path[296]; /* osdevpath <= 256 */ - char family[64]; - char sku[64]; - char sn[64]; - char string[21]; - - obj->subtype = strdup("MIC"); - - snprintf(path, sizeof(path), "%s/family", osdevpath); - if (!hwloc_read_path_by_length(path, family, sizeof(family), root_fd)) { - char *eol = strchr(family, '\n'); - if (eol) - *eol = 0; - hwloc_obj_add_info(obj, "MICFamily", family); - } - - snprintf(path, sizeof(path), "%s/sku", osdevpath); - if (!hwloc_read_path_by_length(path, sku, sizeof(sku), root_fd)) { - char *eol = strchr(sku, '\n'); - if (eol) - *eol = 0; - hwloc_obj_add_info(obj, "MICSKU", sku); - } - - snprintf(path, sizeof(path), "%s/serialnumber", osdevpath); - if (!hwloc_read_path_by_length(path, sn, sizeof(sn), root_fd)) { - char *eol; - eol = strchr(sn, '\n'); - if (eol) - *eol = 0; - hwloc_obj_add_info(obj, "MICSerialNumber", sn); - } - - snprintf(path, sizeof(path), "%s/active_cores", osdevpath); - if (!hwloc_read_path_by_length(path, string, sizeof(string), root_fd)) { - unsigned long count = strtoul(string, NULL, 16); - snprintf(string, sizeof(string), "%lu", count); - hwloc_obj_add_info(obj, "MICActiveCores", string); - } - - snprintf(path, sizeof(path), "%s/memsize", osdevpath); - if (!hwloc_read_path_by_length(path, string, sizeof(string), root_fd)) { - unsigned long count = strtoul(string, NULL, 16); - snprintf(string, sizeof(string), "%lu", count); - hwloc_obj_add_info(obj, "MICMemorySize", string); - } -} - -static int -hwloc_linuxfs_lookup_mic_class(struct hwloc_backend *backend, unsigned osdev_flags) -{ - struct hwloc_linux_backend_data_s *data = backend->private_data; - int root_fd = data->root_fd; - unsigned idx; - DIR *dir; - struct dirent *dirent; - - dir = hwloc_opendir("/sys/class/mic", root_fd); - if (!dir) - return 0; - - while ((dirent = readdir(dir)) != NULL) { - char path[256]; - hwloc_obj_t obj, parent; - - if (!strcmp(dirent->d_name, ".") || !strcmp(dirent->d_name, "..")) - continue; - if (sscanf(dirent->d_name, "mic%u", &idx) != 1) - continue; - - snprintf(path, sizeof(path), "/sys/class/mic/mic%u", idx); - parent = hwloc_linuxfs_find_osdev_parent(backend, root_fd, path, osdev_flags); - if (!parent) - continue; - - obj = hwloc_linux_add_os_device(backend, parent, HWLOC_OBJ_OSDEV_COPROC, dirent->d_name); - - hwloc_linuxfs_mic_class_fillinfos(root_fd, obj, path); - } - - closedir(dir); - - return 0; -} - static int hwloc_linuxfs_lookup_drm_class(struct hwloc_backend *backend, unsigned osdev_flags) { @@ -6808,9 +6621,10 @@ hwloc_linuxfs_pci_look_pcislots(struct hwloc_backend *backend) return 0; } #endif /* HWLOC_HAVE_LINUXPCI */ +#endif /* HWLOC_HAVE_LINUXIO */ static int -hwloc_look_linuxfs_io(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus __hwloc_attribute_unused) +hwloc_look_linuxfs(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) { /* * This backend may be used with topology->is_thissystem set (default) @@ -6818,53 +6632,41 @@ hwloc_look_linuxfs_io(struct hwloc_backend *backend, struct hwloc_disc_status *d */ struct hwloc_topology *topology = backend->topology; - struct hwloc_linux_backend_data_s *data = NULL; - struct hwloc_backend *tmpbackend; +#ifdef HWLOC_HAVE_LINUXIO enum hwloc_type_filter_e pfilter, bfilter, ofilter, mfilter; - int root_fd = -1; +#endif /* HWLOC_HAVE_LINUXIO */ + + if (dstatus->phase == HWLOC_DISC_PHASE_CPU) { + hwloc_linuxfs_look_cpu(backend, dstatus); + return 0; + } +#ifdef HWLOC_HAVE_LINUXIO hwloc_topology_get_type_filter(topology, HWLOC_OBJ_PCI_DEVICE, &pfilter); hwloc_topology_get_type_filter(topology, HWLOC_OBJ_BRIDGE, &bfilter); hwloc_topology_get_type_filter(topology, HWLOC_OBJ_OS_DEVICE, &ofilter); hwloc_topology_get_type_filter(topology, HWLOC_OBJ_MISC, &mfilter); - if (bfilter == HWLOC_TYPE_FILTER_KEEP_NONE - && pfilter == HWLOC_TYPE_FILTER_KEEP_NONE - && ofilter == HWLOC_TYPE_FILTER_KEEP_NONE - && mfilter == HWLOC_TYPE_FILTER_KEEP_NONE) - return 0; - - /* hackily find the linux backend to steal its private_data (for fsroot) */ - tmpbackend = topology->backends; - while (tmpbackend) { - if (tmpbackend->component == &hwloc_linux_disc_component) { - data = tmpbackend->private_data; - break; - } - tmpbackend = tmpbackend->next; - } - if (!data) { - hwloc_debug("linuxio failed to find linux backend private_data, aborting its discovery()\n"); - return -1; - } - backend->private_data = data; - root_fd = data->root_fd; - hwloc_debug("linuxio backend stole linux backend root_fd %d\n", root_fd); - if (bfilter != HWLOC_TYPE_FILTER_KEEP_NONE - || pfilter != HWLOC_TYPE_FILTER_KEEP_NONE) { + if (dstatus->phase == HWLOC_DISC_PHASE_PCI + && (bfilter != HWLOC_TYPE_FILTER_KEEP_NONE + || pfilter != HWLOC_TYPE_FILTER_KEEP_NONE)) { #ifdef HWLOC_HAVE_LINUXPCI - if (dstatus->flags & HWLOC_DISC_STATUS_FLAG_PCI_DONE) { - hwloc_debug("%s", "PCI discovery has already been performed, skipping PCI in linuxio backend.\n"); - } else { hwloc_linuxfs_pci_look_pcidevices(backend); - dstatus->flags |= HWLOC_DISC_STATUS_FLAG_PCI_DONE; + /* no need to run another PCI phase */ + dstatus->excluded_phases |= HWLOC_DISC_PHASE_PCI; +#endif /* HWLOC_HAVE_LINUXPCI */ } - hwloc_linuxfs_pci_look_pcislots(backend); + if (dstatus->phase == HWLOC_DISC_PHASE_ANNOTATE + && (bfilter != HWLOC_TYPE_FILTER_KEEP_NONE + || pfilter != HWLOC_TYPE_FILTER_KEEP_NONE)) { +#ifdef HWLOC_HAVE_LINUXPCI + hwloc_linuxfs_pci_look_pcislots(backend); #endif /* HWLOC_HAVE_LINUXPCI */ } - if (ofilter != HWLOC_TYPE_FILTER_KEEP_NONE) { + if (dstatus->phase == HWLOC_DISC_PHASE_IO + && ofilter != HWLOC_TYPE_FILTER_KEEP_NONE) { unsigned osdev_flags = 0; if (getenv("HWLOC_VIRTUAL_LINUX_OSDEV")) osdev_flags |= HWLOC_LINUXFS_OSDEV_FLAG_FIND_VIRTUAL; @@ -6875,60 +6677,165 @@ hwloc_look_linuxfs_io(struct hwloc_backend *backend, struct hwloc_disc_status *d hwloc_linuxfs_lookup_dax_class(backend, osdev_flags); hwloc_linuxfs_lookup_net_class(backend, osdev_flags); hwloc_linuxfs_lookup_infiniband_class(backend, osdev_flags); - hwloc_linuxfs_lookup_mic_class(backend, osdev_flags); - if (ofilter != HWLOC_TYPE_FILTER_KEEP_IMPORTANT) { - hwloc_linuxfs_lookup_drm_class(backend, osdev_flags); - hwloc_linuxfs_lookup_dma_class(backend, osdev_flags); - } + if (ofilter != HWLOC_TYPE_FILTER_KEEP_IMPORTANT) { + hwloc_linuxfs_lookup_drm_class(backend, osdev_flags); + hwloc_linuxfs_lookup_dma_class(backend, osdev_flags); + } } - if (mfilter != HWLOC_TYPE_FILTER_KEEP_NONE) { - hwloc__get_firmware_dmi_memory_info(topology, data); + + if (dstatus->phase == HWLOC_DISC_PHASE_MISC + && mfilter != HWLOC_TYPE_FILTER_KEEP_NONE) { + hwloc__get_firmware_dmi_memory_info(topology, backend->private_data); } +#endif /* HWLOC_HAVE_LINUXIO */ return 0; } +/******************************* + ******* Linux component ******* + *******************************/ + +static void +hwloc_linux_backend_disable(struct hwloc_backend *backend) +{ + struct hwloc_linux_backend_data_s *data = backend->private_data; +#ifdef HAVE_OPENAT + if (data->root_fd >= 0) { + free(data->root_path); + close(data->root_fd); + } +#endif +#ifdef HWLOC_HAVE_LIBUDEV + if (data->udev) + udev_unref(data->udev); +#endif + free(data); +} + static struct hwloc_backend * -hwloc_linuxio_component_instantiate(struct hwloc_topology *topology, - struct hwloc_disc_component *component, - const void *_data1 __hwloc_attribute_unused, - const void *_data2 __hwloc_attribute_unused, - const void *_data3 __hwloc_attribute_unused) +hwloc_linux_component_instantiate(struct hwloc_topology *topology, + struct hwloc_disc_component *component, + unsigned excluded_phases __hwloc_attribute_unused, + const void *_data1 __hwloc_attribute_unused, + const void *_data2 __hwloc_attribute_unused, + const void *_data3 __hwloc_attribute_unused) { struct hwloc_backend *backend; + struct hwloc_linux_backend_data_s *data; + const char * fsroot_path; + int root = -1; + char *env; backend = hwloc_backend_alloc(topology, component); if (!backend) - return NULL; - backend->discover = hwloc_look_linuxfs_io; + goto out; - /* backend->is_thissystem should be what the linux backend has, - * but it's actually useless since both backends will change the main topology->is_thissystem in the same way. - */ + data = malloc(sizeof(*data)); + if (!data) { + errno = ENOMEM; + goto out_with_backend; + } + + backend->private_data = data; + backend->discover = hwloc_look_linuxfs; + backend->get_pci_busid_cpuset = hwloc_linux_backend_get_pci_busid_cpuset; + backend->disable = hwloc_linux_backend_disable; + + /* default values */ + data->arch = HWLOC_LINUX_ARCH_UNKNOWN; + data->is_knl = 0; + data->is_amd_with_CU = 0; + data->use_dt = 0; + data->is_real_fsroot = 1; + data->root_path = NULL; + fsroot_path = getenv("HWLOC_FSROOT"); + if (!fsroot_path) + fsroot_path = "/"; + + if (strcmp(fsroot_path, "/")) { +#ifdef HAVE_OPENAT + int flags; + + root = open(fsroot_path, O_RDONLY | O_DIRECTORY); + if (root < 0) + goto out_with_data; + + backend->is_thissystem = 0; + data->is_real_fsroot = 0; + data->root_path = strdup(fsroot_path); + + /* Since this fd stays open after hwloc returns, mark it as + close-on-exec so that children don't inherit it. Stevens says + that we should GETFD before we SETFD, so we do. */ + flags = fcntl(root, F_GETFD, 0); + if (-1 == flags || + -1 == fcntl(root, F_SETFD, FD_CLOEXEC | flags)) { + close(root); + root = -1; + goto out_with_data; + } +#else + fprintf(stderr, "Cannot change Linux fsroot without openat() support.\n"); + errno = ENOSYS; + goto out_with_data; +#endif + } + data->root_fd = root; + +#ifdef HWLOC_HAVE_LIBUDEV + data->udev = NULL; + if (data->is_real_fsroot) { + data->udev = udev_new(); + } +#endif + + data->dumped_hwdata_dirname = getenv("HWLOC_DUMPED_HWDATA_DIR"); + if (!data->dumped_hwdata_dirname) + data->dumped_hwdata_dirname = (char *) RUNSTATEDIR "/hwloc/"; + + data->use_numa_distances = 1; + data->use_numa_distances_for_cpuless = 1; + data->use_numa_initiators = 1; + env = getenv("HWLOC_USE_NUMA_DISTANCES"); + if (env) { + unsigned val = atoi(env); + data->use_numa_distances = !!(val & 3); /* 2 implies 1 */ + data->use_numa_distances_for_cpuless = !!(val & 2); + data->use_numa_initiators = !!(val & 4); + } + + env = getenv("HWLOC_USE_DT"); + if (env) + data->use_dt = atoi(env); - /* backend->private_data will point to the main linux private_data after load(), - * once the main linux component is instantiated for sure. - * it remains valid until the main linux component gets disabled during topology destroy. - */ return backend; + + out_with_data: +#ifdef HAVE_OPENAT + free(data->root_path); +#endif + free(data); + out_with_backend: + free(backend); + out: + return NULL; } -static struct hwloc_disc_component hwloc_linuxio_disc_component = { - HWLOC_DISC_COMPONENT_TYPE_MISC, - "linuxio", - HWLOC_DISC_COMPONENT_TYPE_GLOBAL, - hwloc_linuxio_component_instantiate, - 19, /* after pci */ +static struct hwloc_disc_component hwloc_linux_disc_component = { + "linux", + HWLOC_DISC_PHASE_CPU | HWLOC_DISC_PHASE_PCI | HWLOC_DISC_PHASE_IO | HWLOC_DISC_PHASE_MISC | HWLOC_DISC_PHASE_ANNOTATE, + HWLOC_DISC_PHASE_GLOBAL, + hwloc_linux_component_instantiate, + 50, 1, NULL }; -const struct hwloc_component hwloc_linuxio_component = { +const struct hwloc_component hwloc_linux_component = { HWLOC_COMPONENT_ABI, NULL, NULL, HWLOC_COMPONENT_TYPE_DISC, 0, - &hwloc_linuxio_disc_component + &hwloc_linux_disc_component }; - -#endif /* HWLOC_HAVE_LINUXIO */ diff --git a/hwloc/topology-netbsd.c b/hwloc/topology-netbsd.c index 1c8d37e3ae..96921d85a5 100644 --- a/hwloc/topology-netbsd.c +++ b/hwloc/topology-netbsd.c @@ -146,7 +146,7 @@ hwloc_netbsd_node_meminfo_info(struct hwloc_topology *topology) #endif static int -hwloc_look_netbsd(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus __hwloc_attribute_unused) +hwloc_look_netbsd(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) { /* * This backend uses the underlying OS. @@ -156,6 +156,8 @@ hwloc_look_netbsd(struct hwloc_backend *backend, struct hwloc_disc_status *dstat struct hwloc_topology *topology = backend->topology; + assert(dstatus->phase == HWLOC_DISC_PHASE_CPU); + if (!topology->levels[0][0]->cpuset) { /* Nobody (even the x86 backend) created objects yet, setup basic objects */ int nbprocs = hwloc_fallback_nbprocessors(0); @@ -194,6 +196,7 @@ hwloc_set_netbsd_hooks(struct hwloc_binding_hooks *hooks, static struct hwloc_backend * hwloc_netbsd_component_instantiate(struct hwloc_topology *topology, struct hwloc_disc_component *component, + unsigned excluded_phases __hwloc_attribute_unused, const void *_data1 __hwloc_attribute_unused, const void *_data2 __hwloc_attribute_unused, const void *_data3 __hwloc_attribute_unused) @@ -207,9 +210,9 @@ hwloc_netbsd_component_instantiate(struct hwloc_topology *topology, } static struct hwloc_disc_component hwloc_netbsd_disc_component = { - HWLOC_DISC_COMPONENT_TYPE_CPU, "netbsd", - HWLOC_DISC_COMPONENT_TYPE_GLOBAL, + HWLOC_DISC_PHASE_CPU, + HWLOC_DISC_PHASE_GLOBAL, hwloc_netbsd_component_instantiate, 50, 1, diff --git a/hwloc/topology-noos.c b/hwloc/topology-noos.c index aff9dafbe5..174b6fd8c2 100644 --- a/hwloc/topology-noos.c +++ b/hwloc/topology-noos.c @@ -11,7 +11,7 @@ #include "private/private.h" static int -hwloc_look_noos(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus __hwloc_attribute_unused) +hwloc_look_noos(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) { /* * This backend uses the underlying OS. @@ -22,6 +22,8 @@ hwloc_look_noos(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus struct hwloc_topology *topology = backend->topology; int nbprocs; + assert(dstatus->phase == HWLOC_DISC_PHASE_CPU); + if (topology->levels[0][0]->cpuset) /* somebody discovered things */ return -1; @@ -41,6 +43,7 @@ hwloc_look_noos(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus static struct hwloc_backend * hwloc_noos_component_instantiate(struct hwloc_topology *topology, struct hwloc_disc_component *component, + unsigned excluded_phases __hwloc_attribute_unused, const void *_data1 __hwloc_attribute_unused, const void *_data2 __hwloc_attribute_unused, const void *_data3 __hwloc_attribute_unused) @@ -54,9 +57,9 @@ hwloc_noos_component_instantiate(struct hwloc_topology *topology, } static struct hwloc_disc_component hwloc_noos_disc_component = { - HWLOC_DISC_COMPONENT_TYPE_CPU, "no_os", - HWLOC_DISC_COMPONENT_TYPE_GLOBAL, + HWLOC_DISC_PHASE_CPU, + HWLOC_DISC_PHASE_GLOBAL, hwloc_noos_component_instantiate, 40, /* lower than native OS component, higher than globals */ 1, diff --git a/hwloc/topology-nvml.c b/hwloc/topology-nvml.c index 64943b07a2..bc1efdf054 100644 --- a/hwloc/topology-nvml.c +++ b/hwloc/topology-nvml.c @@ -14,7 +14,7 @@ #include static int -hwloc_nvml_discover(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus __hwloc_attribute_unused) +hwloc_nvml_discover(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) { /* * This backend uses the underlying OS. @@ -27,6 +27,8 @@ hwloc_nvml_discover(struct hwloc_backend *backend, struct hwloc_disc_status *dst nvmlReturn_t ret; unsigned nb, i; + assert(dstatus->phase == HWLOC_DISC_PHASE_IO); + hwloc_topology_get_type_filter(topology, HWLOC_OBJ_OS_DEVICE, &filter); if (filter == HWLOC_TYPE_FILTER_KEEP_NONE) return 0; @@ -106,6 +108,7 @@ hwloc_nvml_discover(struct hwloc_backend *backend, struct hwloc_disc_status *dst static struct hwloc_backend * hwloc_nvml_component_instantiate(struct hwloc_topology *topology, struct hwloc_disc_component *component, + unsigned excluded_phases __hwloc_attribute_unused, const void *_data1 __hwloc_attribute_unused, const void *_data2 __hwloc_attribute_unused, const void *_data3 __hwloc_attribute_unused) @@ -120,9 +123,9 @@ hwloc_nvml_component_instantiate(struct hwloc_topology *topology, } static struct hwloc_disc_component hwloc_nvml_disc_component = { - HWLOC_DISC_COMPONENT_TYPE_MISC, "nvml", - HWLOC_DISC_COMPONENT_TYPE_GLOBAL, + HWLOC_DISC_PHASE_IO, + HWLOC_DISC_PHASE_GLOBAL, hwloc_nvml_component_instantiate, 5, /* after pci, and after cuda since likely less useful */ 1, diff --git a/hwloc/topology-opencl.c b/hwloc/topology-opencl.c index e16314c630..146e867513 100644 --- a/hwloc/topology-opencl.c +++ b/hwloc/topology-opencl.c @@ -35,7 +35,7 @@ static int -hwloc_opencl_discover(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus __hwloc_attribute_unused) +hwloc_opencl_discover(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) { /* * This backend uses the underlying OS. @@ -50,6 +50,8 @@ hwloc_opencl_discover(struct hwloc_backend *backend, struct hwloc_disc_status *d cl_int clret; unsigned j; + assert(dstatus->phase == HWLOC_DISC_PHASE_IO); + hwloc_topology_get_type_filter(topology, HWLOC_OBJ_OS_DEVICE, &filter); if (filter == HWLOC_TYPE_FILTER_KEEP_NONE) return 0; @@ -176,6 +178,7 @@ hwloc_opencl_discover(struct hwloc_backend *backend, struct hwloc_disc_status *d static struct hwloc_backend * hwloc_opencl_component_instantiate(struct hwloc_topology *topology, struct hwloc_disc_component *component, + unsigned excluded_phases __hwloc_attribute_unused, const void *_data1 __hwloc_attribute_unused, const void *_data2 __hwloc_attribute_unused, const void *_data3 __hwloc_attribute_unused) @@ -190,9 +193,9 @@ hwloc_opencl_component_instantiate(struct hwloc_topology *topology, } static struct hwloc_disc_component hwloc_opencl_disc_component = { - HWLOC_DISC_COMPONENT_TYPE_MISC, "opencl", - HWLOC_DISC_COMPONENT_TYPE_GLOBAL, + HWLOC_DISC_PHASE_IO, + HWLOC_DISC_PHASE_GLOBAL, hwloc_opencl_component_instantiate, 10, /* after pci */ 1, diff --git a/hwloc/topology-pci.c b/hwloc/topology-pci.c index 7b54f6eb12..0ed45f9fc6 100644 --- a/hwloc/topology-pci.c +++ b/hwloc/topology-pci.c @@ -87,6 +87,48 @@ static pthread_mutex_t hwloc_pciaccess_mutex = PTHREAD_MUTEX_INITIALIZER; #error No mutex implementation available #endif +static void +hwloc_pci_get_obj_names(hwloc_obj_t obj, struct pci_id_match *m) +{ + const char *vendorname, *devicename; + m->vendor_id = obj->attr->pcidev.vendor_id; + m->device_id = obj->attr->pcidev.device_id; + pci_get_strings(m, &devicename, &vendorname, NULL, NULL); + if (vendorname && *vendorname) + hwloc_obj_add_info(obj, "PCIVendor", vendorname); + if (devicename && *devicename) + hwloc_obj_add_info(obj, "PCIDevice", devicename); +} + +static void +hwloc_pci_get_names(hwloc_topology_t topology) +{ + hwloc_obj_t obj; + struct pci_id_match m; + + /* we need the lists of PCI and bridges */ + hwloc_topology_reconnect(topology, 0); + + m.subvendor_id = PCI_MATCH_ANY; + m.subdevice_id = PCI_MATCH_ANY; + m.device_class = 0; + m.device_class_mask = 0; + m.match_data = 0; + + HWLOC_PCIACCESS_LOCK(); + + obj = NULL; + while ((obj = hwloc_get_next_pcidev(topology, obj)) != NULL) + hwloc_pci_get_obj_names(obj, &m); + + obj = NULL; + while ((obj = hwloc_get_next_bridge(topology, obj)) != NULL) + if (obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI) + hwloc_pci_get_obj_names(obj, &m); + + HWLOC_PCIACCESS_UNLOCK(); +} + static int hwloc_look_pci(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) { @@ -102,6 +144,13 @@ hwloc_look_pci(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) int ret; struct pci_device_iterator *iter; struct pci_device *pcidev; + struct pci_id_match m; + + m.subvendor_id = PCI_MATCH_ANY; + m.subdevice_id = PCI_MATCH_ANY; + m.device_class = 0; + m.device_class_mask = 0; + m.match_data = 0; hwloc_topology_get_type_filter(topology, HWLOC_OBJ_PCI_DEVICE, &pfilter); hwloc_topology_get_type_filter(topology, HWLOC_OBJ_BRIDGE, &bfilter); @@ -109,10 +158,11 @@ hwloc_look_pci(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) && pfilter == HWLOC_TYPE_FILTER_KEEP_NONE) return 0; - if (dstatus->flags & HWLOC_DISC_STATUS_FLAG_PCI_DONE) { - hwloc_debug("%s", "PCI discovery has already been performed, skipping PCI backend.\n"); + if (dstatus->phase == HWLOC_DISC_PHASE_ANNOTATE) { + hwloc_pci_get_names(topology); return 0; } + assert(dstatus->phase == HWLOC_DISC_PHASE_PCI); hwloc_debug("%s", "\nScanning PCI buses...\n"); @@ -136,7 +186,6 @@ hwloc_look_pci(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) pcidev; pcidev = pci_device_next(iter)) { - const char *vendorname, *devicename; unsigned char config_space_cache[CONFIG_SPACE_CACHESIZE]; hwloc_obj_type_t type; struct hwloc_obj *obj; @@ -313,22 +362,11 @@ hwloc_look_pci(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) */ } - /* get the vendor name */ - vendorname = pci_device_get_vendor_name(pcidev); - if (vendorname && *vendorname) - hwloc_obj_add_info(obj, "PCIVendor", vendorname); - - /* get the device name */ - devicename = pci_device_get_device_name(pcidev); - if (devicename && *devicename) - hwloc_obj_add_info(obj, "PCIDevice", devicename); - - hwloc_debug(" %04x:%02x:%02x.%01x %04x %04x:%04x %s %s\n", + hwloc_debug(" %04x:%02x:%02x.%01x %04x %04x:%04x\n", domain, bus, dev, func, - device_class, pcidev->vendor_id, pcidev->device_id, - vendorname && *vendorname ? vendorname : "??", - devicename && *devicename ? devicename : "??"); + device_class, pcidev->vendor_id, pcidev->device_id); + hwloc_pci_get_obj_names(obj, &m); hwloc_pcidisc_tree_insert_by_busid(&tree, obj); } @@ -338,35 +376,41 @@ hwloc_look_pci(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) HWLOC_PCIACCESS_UNLOCK(); hwloc_pcidisc_tree_attach(topology, tree); - dstatus->flags |= HWLOC_DISC_STATUS_FLAG_PCI_DONE; + + /* no need to run another PCI phase */ + dstatus->excluded_phases |= HWLOC_DISC_PHASE_PCI; + /* no need to run the annotate phase, we did it above */ + backend->phases &= HWLOC_DISC_PHASE_ANNOTATE; return 0; } static struct hwloc_backend * hwloc_pci_component_instantiate(struct hwloc_topology *topology, struct hwloc_disc_component *component, + unsigned excluded_phases __hwloc_attribute_unused, const void *_data1 __hwloc_attribute_unused, const void *_data2 __hwloc_attribute_unused, const void *_data3 __hwloc_attribute_unused) { struct hwloc_backend *backend; -#ifdef HWLOC_SOLARIS_SYS - if ((uid_t)0 != geteuid()) - return NULL; -#endif - backend = hwloc_backend_alloc(topology, component); if (!backend) return NULL; backend->discover = hwloc_look_pci; + +#ifdef HWLOC_SOLARIS_SYS + if ((uid_t)0 != geteuid()) + backend->phases &= ~HWLOC_DISC_PHASE_PCI; +#endif + return backend; } static struct hwloc_disc_component hwloc_pci_disc_component = { - HWLOC_DISC_COMPONENT_TYPE_MISC, "pci", - HWLOC_DISC_COMPONENT_TYPE_GLOBAL, + HWLOC_DISC_PHASE_PCI | HWLOC_DISC_PHASE_ANNOTATE, + HWLOC_DISC_PHASE_GLOBAL, hwloc_pci_component_instantiate, 20, 1, diff --git a/hwloc/topology-solaris.c b/hwloc/topology-solaris.c index fe0062814d..2aab96794d 100644 --- a/hwloc/topology-solaris.c +++ b/hwloc/topology-solaris.c @@ -485,6 +485,8 @@ hwloc_look_lgrp(struct hwloc_topology *topology, struct hwloc_disc_status *dstat unsigned curlgrp = 0; int nlgrps; lgrp_id_t root; + const char *env = getenv("HWLOC_USE_NUMA_DISTANCES"); + int need_distances = env && atoi(env); if (!(dstatus->flags & HWLOC_DISC_STATUS_FLAG_GOT_ALLOWED_RESOURCES)) { lgrp_list_allowed(topology); @@ -505,7 +507,7 @@ hwloc_look_lgrp(struct hwloc_topology *topology, struct hwloc_disc_status *dstat lgrp_build_numanodes(topology, cookie, root, glob_lgrps, &curlgrp); #if HAVE_DECL_LGRP_LATENCY_COOKIE - if (nlgrps > 1) { + if (nlgrps > 1 && need_distances) { uint64_t *distances = calloc(curlgrp*curlgrp, sizeof(uint64_t)); unsigned i, j; if (distances) { @@ -519,7 +521,7 @@ hwloc_look_lgrp(struct hwloc_topology *topology, struct hwloc_disc_status *dstat } distances[i*curlgrp+j] = (uint64_t) latency; } - hwloc_internal_distances_add(topology, curlgrp, glob_lgrps, distances, + hwloc_internal_distances_add(topology, "NUMA:Solaris", curlgrp, glob_lgrps, distances, HWLOC_DISTANCES_KIND_FROM_OS|HWLOC_DISTANCES_KIND_MEANS_LATENCY, HWLOC_DISTANCES_ADD_FLAG_GROUP); glob_lgrps = NULL; /* dont free it below */ @@ -966,7 +968,7 @@ hwloc_look_kstat(struct hwloc_topology *topology) #endif /* LIBKSTAT */ static int -hwloc_look_solaris(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus __hwloc_attribute_unused) +hwloc_look_solaris(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) { /* * This backend uses the underlying OS. @@ -977,6 +979,8 @@ hwloc_look_solaris(struct hwloc_backend *backend, struct hwloc_disc_status *dsta struct hwloc_topology *topology = backend->topology; int alreadypus = 0; + assert(dstatus->phase == HWLOC_DISC_PHASE_CPU); + if (topology->levels[0][0]->cpuset) /* somebody discovered things */ return -1; @@ -1055,6 +1059,7 @@ hwloc_set_solaris_hooks(struct hwloc_binding_hooks *hooks, static struct hwloc_backend * hwloc_solaris_component_instantiate(struct hwloc_topology *topology, struct hwloc_disc_component *component, + unsigned excluded_phases __hwloc_attribute_unused, const void *_data1 __hwloc_attribute_unused, const void *_data2 __hwloc_attribute_unused, const void *_data3 __hwloc_attribute_unused) @@ -1068,9 +1073,9 @@ hwloc_solaris_component_instantiate(struct hwloc_topology *topology, } static struct hwloc_disc_component hwloc_solaris_disc_component = { - HWLOC_DISC_COMPONENT_TYPE_CPU, "solaris", - HWLOC_DISC_COMPONENT_TYPE_GLOBAL, + HWLOC_DISC_PHASE_CPU, + HWLOC_DISC_PHASE_GLOBAL, hwloc_solaris_component_instantiate, 50, 1, diff --git a/hwloc/topology-synthetic.c b/hwloc/topology-synthetic.c index cfc71944f9..686efce1fb 100644 --- a/hwloc/topology-synthetic.c +++ b/hwloc/topology-synthetic.c @@ -539,11 +539,16 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data, if (*pos < '0' || *pos > '9') { if (hwloc_type_sscanf(pos, &type, &attrs, sizeof(attrs)) < 0) { - /* FIXME: allow generic "Cache" string? would require to deal with possibly duplicate cache levels */ - if (verbose) - fprintf(stderr, "Synthetic string with unknown object type at '%s'\n", pos); - errno = EINVAL; - goto error; + if (!strncmp(pos, "Tile", 4) || !strncmp(pos, "Module", 6)) { + /* possible future types */ + type = HWLOC_OBJ_GROUP; + } else { + /* FIXME: allow generic "Cache" string? would require to deal with possibly duplicate cache levels */ + if (verbose) + fprintf(stderr, "Synthetic string with unknown object type at '%s'\n", pos); + errno = EINVAL; + goto error; + } } if (type == HWLOC_OBJ_MACHINE || type == HWLOC_OBJ_MISC || type == HWLOC_OBJ_BRIDGE || type == HWLOC_OBJ_PCI_DEVICE || type == HWLOC_OBJ_OS_DEVICE) { if (verbose) @@ -653,6 +658,12 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data, errno = EINVAL; return -1; } + if (type_count[HWLOC_OBJ_DIE] > 1) { + if (verbose) + fprintf(stderr, "Synthetic string cannot have several die levels\n"); + errno = EINVAL; + return -1; + } if (type_count[HWLOC_OBJ_NUMANODE] > 1) { if (verbose) fprintf(stderr, "Synthetic string cannot have several NUMA node levels\n"); @@ -837,6 +848,7 @@ hwloc_synthetic_set_attr(struct hwloc_synthetic_attr_s *sattr, obj->attr->numanode.page_types[0].count = sattr->memorysize / 4096; break; case HWLOC_OBJ_PACKAGE: + case HWLOC_OBJ_DIE: break; case HWLOC_OBJ_L1CACHE: case HWLOC_OBJ_L2CACHE: @@ -961,7 +973,7 @@ hwloc__look_synthetic(struct hwloc_topology *topology, } static int -hwloc_look_synthetic(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus __hwloc_attribute_unused) +hwloc_look_synthetic(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) { /* * This backend enforces !topology->is_thissystem by default. @@ -972,6 +984,8 @@ hwloc_look_synthetic(struct hwloc_backend *backend, struct hwloc_disc_status *ds hwloc_bitmap_t cpuset = hwloc_bitmap_alloc(); unsigned i; + assert(dstatus->phase == HWLOC_DISC_PHASE_GLOBAL); + assert(!topology->levels[0][0]->cpuset); hwloc_alloc_root_sets(topology->levels[0][0]); @@ -1015,6 +1029,7 @@ hwloc_synthetic_backend_disable(struct hwloc_backend *backend) static struct hwloc_backend * hwloc_synthetic_component_instantiate(struct hwloc_topology *topology, struct hwloc_disc_component *component, + unsigned excluded_phases __hwloc_attribute_unused, const void *_data1, const void *_data2 __hwloc_attribute_unused, const void *_data3 __hwloc_attribute_unused) @@ -1064,8 +1079,8 @@ hwloc_synthetic_component_instantiate(struct hwloc_topology *topology, } static struct hwloc_disc_component hwloc_synthetic_disc_component = { - HWLOC_DISC_COMPONENT_TYPE_GLOBAL, "synthetic", + HWLOC_DISC_PHASE_GLOBAL, ~0, hwloc_synthetic_component_instantiate, 30, @@ -1280,6 +1295,12 @@ hwloc__export_synthetic_obj(struct hwloc_topology * topology, unsigned long flag /* if exporting to v1 or without extended-types, use all-v1-compatible Socket name */ res = hwloc_snprintf(tmp, tmplen, "Socket%s", aritys); + } else if (obj->type == HWLOC_OBJ_DIE + && (flags & (HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES + |HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_V1))) { + /* if exporting to v1 or without extended-types, use all-v1-compatible Group name */ + res = hwloc_snprintf(tmp, tmplen, "Group%s", aritys); + } else if (obj->type == HWLOC_OBJ_GROUP /* don't export group depth */ || flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES) { res = hwloc_snprintf(tmp, tmplen, "%s%s", hwloc_obj_type_string(obj->type), aritys); diff --git a/hwloc/topology-windows.c b/hwloc/topology-windows.c index 4c8ac59b5e..22521aa31c 100644 --- a/hwloc/topology-windows.c +++ b/hwloc/topology-windows.c @@ -731,7 +731,7 @@ hwloc_win_get_area_memlocation(hwloc_topology_t topology __hwloc_attribute_unuse */ static int -hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus __hwloc_attribute_unused) +hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) { /* * This backend uses the underlying OS. @@ -746,6 +746,8 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta int gotnuma = 0; int gotnumamemory = 0; + assert(dstatus->phase == HWLOC_DISC_PHASE_CPU); + if (topology->levels[0][0]->cpuset) /* somebody discovered things */ return -1; @@ -1144,6 +1146,7 @@ static void hwloc_windows_component_finalize(unsigned long flags __hwloc_attribu static struct hwloc_backend * hwloc_windows_component_instantiate(struct hwloc_topology *topology, struct hwloc_disc_component *component, + unsigned excluded_phases __hwloc_attribute_unused, const void *_data1 __hwloc_attribute_unused, const void *_data2 __hwloc_attribute_unused, const void *_data3 __hwloc_attribute_unused) @@ -1157,9 +1160,9 @@ hwloc_windows_component_instantiate(struct hwloc_topology *topology, } static struct hwloc_disc_component hwloc_windows_disc_component = { - HWLOC_DISC_COMPONENT_TYPE_CPU, "windows", - HWLOC_DISC_COMPONENT_TYPE_GLOBAL, + HWLOC_DISC_PHASE_CPU, + HWLOC_DISC_PHASE_GLOBAL, hwloc_windows_component_instantiate, 50, 1, diff --git a/hwloc/topology-x86.c b/hwloc/topology-x86.c index da1b4861ae..1060157de7 100644 --- a/hwloc/topology-x86.c +++ b/hwloc/topology-x86.c @@ -836,7 +836,8 @@ hwloc_x86_add_groups(hwloc_topology_t topology, hwloc_bitmap_t remaining_cpuset, unsigned type, const char *subtype, - unsigned kind) + unsigned kind, + int dont_merge) { hwloc_bitmap_t obj_cpuset; hwloc_obj_t obj; @@ -868,6 +869,7 @@ hwloc_x86_add_groups(hwloc_topology_t topology, obj->cpuset = obj_cpuset; obj->subtype = strdup(subtype); obj->attr->group.kind = kind; + obj->attr->group.dont_merge = dont_merge; hwloc_debug_2args_bitmap("os %s %u has cpuset %s\n", subtype, id, obj_cpuset); hwloc_insert_object_by_cpuset(topology, obj); @@ -995,22 +997,17 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset, UNIT, "Compute Unit", - HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT); + HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT, 0); /* Look for Intel Modules inside packages */ hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset, MODULE, "Module", - HWLOC_GROUP_KIND_INTEL_MODULE); + HWLOC_GROUP_KIND_INTEL_MODULE, 0); /* Look for Intel Tiles inside packages */ hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset, TILE, "Tile", - HWLOC_GROUP_KIND_INTEL_TILE); - /* Look for Intel Dies inside packages */ - hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); - hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset, - DIE, "Die", - HWLOC_GROUP_KIND_INTEL_DIE); + HWLOC_GROUP_KIND_INTEL_TILE, 0); /* Look for unknown objects */ if (infos[one].otherids) { @@ -1044,6 +1041,43 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns } } + if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_DIE)) { + /* Look for Intel Dies inside packages */ + if (fulldiscovery) { + hwloc_bitmap_t die_cpuset; + hwloc_obj_t die; + + hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); + while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) { + unsigned packageid = infos[i].ids[PKG]; + unsigned dieid = infos[i].ids[DIE]; + + if (dieid == (unsigned) -1) { + hwloc_bitmap_clr(remaining_cpuset, i); + continue; + } + + die_cpuset = hwloc_bitmap_alloc(); + for (j = i; j < nbprocs; j++) { + if (infos[j].ids[DIE] == (unsigned) -1) { + hwloc_bitmap_clr(remaining_cpuset, j); + continue; + } + + if (infos[j].ids[PKG] == packageid && infos[j].ids[DIE] == dieid) { + hwloc_bitmap_set(die_cpuset, j); + hwloc_bitmap_clr(remaining_cpuset, j); + } + } + die = hwloc_alloc_setup_object(topology, HWLOC_OBJ_DIE, dieid); + die->cpuset = die_cpuset; + hwloc_debug_1arg_bitmap("os die %u has cpuset %s\n", + dieid, die_cpuset); + hwloc_insert_object_by_cpuset(topology, die); + } + } + } + if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_CORE)) { /* Look for cores */ if (fulldiscovery) { @@ -1461,7 +1495,7 @@ int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags) } static int -hwloc_x86_discover(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus __hwloc_attribute_unused) +hwloc_x86_discover(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) { struct hwloc_x86_backend_data_s *data = backend->private_data; struct hwloc_topology *topology = backend->topology; @@ -1469,6 +1503,8 @@ hwloc_x86_discover(struct hwloc_backend *backend, struct hwloc_disc_status *dsta int alreadypus = 0; int ret; + assert(dstatus->phase == HWLOC_DISC_PHASE_CPU); + if (getenv("HWLOC_X86_TOPOEXT_NUMANODES")) { flags |= HWLOC_X86_DISC_FLAG_TOPOEXT_NUMANODES; } @@ -1620,6 +1656,7 @@ hwloc_x86_backend_disable(struct hwloc_backend *backend) static struct hwloc_backend * hwloc_x86_component_instantiate(struct hwloc_topology *topology, struct hwloc_disc_component *component, + unsigned excluded_phases __hwloc_attribute_unused, const void *_data1 __hwloc_attribute_unused, const void *_data2 __hwloc_attribute_unused, const void *_data3 __hwloc_attribute_unused) @@ -1671,9 +1708,9 @@ hwloc_x86_component_instantiate(struct hwloc_topology *topology, } static struct hwloc_disc_component hwloc_x86_disc_component = { - HWLOC_DISC_COMPONENT_TYPE_CPU, "x86", - HWLOC_DISC_COMPONENT_TYPE_GLOBAL, + HWLOC_DISC_PHASE_CPU, + HWLOC_DISC_PHASE_GLOBAL, hwloc_x86_component_instantiate, 45, /* between native and no_os */ 1, diff --git a/hwloc/topology-xml-nolibxml.c b/hwloc/topology-xml-nolibxml.c index 9c9e0cf676..d0e9ec1644 100644 --- a/hwloc/topology-xml-nolibxml.c +++ b/hwloc/topology-xml-nolibxml.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2018 Inria. All rights reserved. + * Copyright © 2009-2019 Inria. All rights reserved. * Copyright © 2009-2011 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -653,7 +653,7 @@ hwloc__nolibxml_export_end_object(hwloc__xml_export_state_t state, const char *n } static void -hwloc__nolibxml_export_add_content(hwloc__xml_export_state_t state, const char *buffer, size_t length) +hwloc__nolibxml_export_add_content(hwloc__xml_export_state_t state, const char *buffer, size_t length __hwloc_attribute_unused) { hwloc__nolibxml_export_state_data_t ndata = (void *) state->data; int res; @@ -665,7 +665,7 @@ hwloc__nolibxml_export_add_content(hwloc__xml_export_state_t state, const char * } ndata->has_content = 1; - res = hwloc_snprintf(ndata->buffer, ndata->remaining, buffer, length); + res = hwloc_snprintf(ndata->buffer, ndata->remaining, "%s", buffer); hwloc__nolibxml_export_update_buffer(ndata, res); } diff --git a/hwloc/topology-xml.c b/hwloc/topology-xml.c index f4401f45ec..17d5055c22 100644 --- a/hwloc/topology-xml.c +++ b/hwloc/topology-xml.c @@ -238,6 +238,15 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology, state->global->msgprefix); } + else if (!strcmp(name, "dont_merge")) { + unsigned long lvalue = strtoul(value, NULL, 10); + if (obj->type == HWLOC_OBJ_GROUP) + obj->attr->group.dont_merge = lvalue; + else if (hwloc__xml_verbose()) + fprintf(stderr, "%s: ignoring dont_merge attribute for non-group object type\n", + state->global->msgprefix); + } + else if (!strcmp(name, "pci_busid")) { switch (obj->type) { case HWLOC_OBJ_PCI_DEVICE: @@ -796,6 +805,21 @@ hwloc__xml_import_object(hwloc_topology_t topology, state->global->msgprefix); goto error_with_object; } + } else if (!strcasecmp(attrvalue, "Tile")) { + /* deal with possible future type */ + obj->type = HWLOC_OBJ_GROUP; + obj->attr->group.kind = HWLOC_GROUP_KIND_INTEL_TILE; + } else if (!strcasecmp(attrvalue, "Module")) { + /* deal with possible future type */ + obj->type = HWLOC_OBJ_GROUP; + obj->attr->group.kind = HWLOC_GROUP_KIND_INTEL_MODULE; + } else if (!strcasecmp(attrvalue, "MemCache")) { + /* ignore possible future type */ + obj->type = _HWLOC_OBJ_FUTURE; + ignored = 1; + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: %s object not-supported, will be ignored\n", + state->global->msgprefix, attrvalue); } else { if (hwloc__xml_verbose()) fprintf(stderr, "%s: unrecognized object type string %s\n", @@ -1021,6 +1045,13 @@ hwloc__xml_import_object(hwloc_topology_t topology, /* end of 1.x specific checks */ } + /* 2.0 backward compatibility */ + if (obj->type == HWLOC_OBJ_GROUP) { + if (obj->attr->group.kind == HWLOC_GROUP_KIND_INTEL_DIE + || (obj->subtype && !strcmp(obj->subtype, "Die"))) + obj->type = HWLOC_OBJ_DIE; + } + /* check that cache attributes are coherent with the actual type */ if (hwloc__obj_type_is_cache(obj->type) && obj->type != hwloc_cache_type_by_depth_type(obj->attr->cache.depth, obj->attr->cache.type)) { @@ -1180,19 +1211,24 @@ hwloc__xml_import_object(hwloc_topology_t topology, static int hwloc__xml_v2import_distances(hwloc_topology_t topology, - hwloc__xml_import_state_t state) + hwloc__xml_import_state_t state, + int heterotypes) { - hwloc_obj_type_t type = HWLOC_OBJ_TYPE_NONE; + hwloc_obj_type_t unique_type = HWLOC_OBJ_TYPE_NONE; + hwloc_obj_type_t *different_types = NULL; unsigned nbobjs = 0; - int indexing = 0; + int indexing = heterotypes; int os_indexing = 0; - int gp_indexing = 0; + int gp_indexing = heterotypes; + char *name = NULL; unsigned long kind = 0; unsigned nr_indexes, nr_u64values; uint64_t *indexes; uint64_t *u64values; int ret; +#define _TAG_NAME (heterotypes ? "distances2hetero" : "distances2") + /* process attributes */ while (1) { char *attrname, *attrvalue; @@ -1201,8 +1237,12 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology, if (!strcmp(attrname, "nbobjs")) nbobjs = strtoul(attrvalue, NULL, 10); else if (!strcmp(attrname, "type")) { - if (hwloc_type_sscanf(attrvalue, &type, NULL, 0) < 0) + if (hwloc_type_sscanf(attrvalue, &unique_type, NULL, 0) < 0) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: unrecognized %s type %s\n", + state->global->msgprefix, _TAG_NAME, attrvalue); goto out; + } } else if (!strcmp(attrname, "indexing")) { indexing = 1; @@ -1214,27 +1254,32 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology, else if (!strcmp(attrname, "kind")) { kind = strtoul(attrvalue, NULL, 10); } + else if (!strcmp(attrname, "name")) { + name = attrvalue; + } else { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: ignoring unknown distance attribute %s\n", - state->global->msgprefix, attrname); + fprintf(stderr, "%s: ignoring unknown %s attribute %s\n", + state->global->msgprefix, _TAG_NAME, attrname); } } /* abort if missing attribute */ - if (!nbobjs || type == HWLOC_OBJ_TYPE_NONE || !indexing || !kind) { + if (!nbobjs || (!heterotypes && unique_type == HWLOC_OBJ_TYPE_NONE) || !indexing || !kind) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: distance2 missing some attributes\n", - state->global->msgprefix); + fprintf(stderr, "%s: %s missing some attributes\n", + state->global->msgprefix, _TAG_NAME); goto out; } indexes = malloc(nbobjs*sizeof(*indexes)); u64values = malloc(nbobjs*nbobjs*sizeof(*u64values)); - if (!indexes || !u64values) { + if (heterotypes) + different_types = malloc(nbobjs*sizeof(*different_types)); + if (!indexes || !u64values || (heterotypes && !different_types)) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: failed to allocate distances arrays for %u objects\n", - state->global->msgprefix, nbobjs); + fprintf(stderr, "%s: failed to allocate %s arrays for %u objects\n", + state->global->msgprefix, _TAG_NAME, nbobjs); goto out_with_arrays; } @@ -1258,16 +1303,16 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology, is_u64values = 1; if (!is_index && !is_u64values) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: distance2 with unrecognized child %s\n", - state->global->msgprefix, tag); + fprintf(stderr, "%s: %s with unrecognized child %s\n", + state->global->msgprefix, _TAG_NAME, tag); goto out_with_arrays; } if (state->global->next_attr(&childstate, &attrname, &attrvalue) < 0 || strcmp(attrname, "length")) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: distance2 child must have length attribute\n", - state->global->msgprefix); + fprintf(stderr, "%s: %s child must have length attribute\n", + state->global->msgprefix, _TAG_NAME); goto out_with_arrays; } length = atoi(attrvalue); @@ -1275,24 +1320,43 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology, ret = state->global->get_content(&childstate, &buffer, length); if (ret < 0) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: distance2 child needs content of length %d\n", - state->global->msgprefix, length); + fprintf(stderr, "%s: %s child needs content of length %d\n", + state->global->msgprefix, _TAG_NAME, length); goto out_with_arrays; } if (is_index) { /* get indexes */ - char *tmp; + char *tmp, *tmp2; if (nr_indexes >= nbobjs) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: distance2 with more than %u indexes\n", - state->global->msgprefix, nbobjs); + fprintf(stderr, "%s: %s with more than %u indexes\n", + state->global->msgprefix, _TAG_NAME, nbobjs); goto out_with_arrays; } tmp = buffer; while (1) { char *next; - unsigned long long u = strtoull(tmp, &next, 0); + unsigned long long u; + if (heterotypes) { + hwloc_obj_type_t t = HWLOC_OBJ_TYPE_NONE; + if (hwloc_type_sscanf(tmp, &t, NULL, 0) < 0) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: %s with unrecognized heterogeneous type %s\n", + state->global->msgprefix, _TAG_NAME, tmp); + goto out_with_arrays; + } + tmp2 = strchr(tmp, ':'); + if (!tmp2) { + if (hwloc__xml_verbose()) + fprintf(stderr, "%s: %s with missing colon after heterogeneous type %s\n", + state->global->msgprefix, _TAG_NAME, tmp); + goto out_with_arrays; + } + tmp = tmp2+1; + different_types[nr_indexes] = t; + } + u = strtoull(tmp, &next, 0); if (next == tmp) break; indexes[nr_indexes++] = u; @@ -1308,8 +1372,8 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology, char *tmp; if (nr_u64values >= nbobjs*nbobjs) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: distance2 with more than %u u64values\n", - state->global->msgprefix, nbobjs*nbobjs); + fprintf(stderr, "%s: %s with more than %u u64values\n", + state->global->msgprefix, _TAG_NAME, nbobjs*nbobjs); goto out_with_arrays; } tmp = buffer; @@ -1332,8 +1396,8 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology, ret = state->global->close_tag(&childstate); if (ret < 0) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: distance2 with more than %u indexes\n", - state->global->msgprefix, nbobjs); + fprintf(stderr, "%s: %s with more than %u indexes\n", + state->global->msgprefix, _TAG_NAME, nbobjs); goto out_with_arrays; } @@ -1342,56 +1406,60 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology, if (nr_indexes != nbobjs) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: distance2 with less than %u indexes\n", - state->global->msgprefix, nbobjs); + fprintf(stderr, "%s: %s with less than %u indexes\n", + state->global->msgprefix, _TAG_NAME, nbobjs); goto out_with_arrays; } if (nr_u64values != nbobjs*nbobjs) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: distance2 with less than %u u64values\n", - state->global->msgprefix, nbobjs*nbobjs); + fprintf(stderr, "%s: %s with less than %u u64values\n", + state->global->msgprefix, _TAG_NAME, nbobjs*nbobjs); goto out_with_arrays; } if (nbobjs < 2) { /* distances with a single object are useless, even if the XML isn't invalid */ if (hwloc__xml_verbose()) - fprintf(stderr, "%s: ignoring distances2 with only %u objects\n", - state->global->msgprefix, nbobjs); + fprintf(stderr, "%s: ignoring %s with only %u objects\n", + state->global->msgprefix, _TAG_NAME, nbobjs); goto out_ignore; } - if (type == HWLOC_OBJ_PU || type == HWLOC_OBJ_NUMANODE) { + if (unique_type == HWLOC_OBJ_PU || unique_type == HWLOC_OBJ_NUMANODE) { if (!os_indexing) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: ignoring PU or NUMA distances2 without os_indexing\n", - state->global->msgprefix); + fprintf(stderr, "%s: ignoring PU or NUMA %s without os_indexing\n", + state->global->msgprefix, _TAG_NAME); goto out_ignore; } } else { if (!gp_indexing) { if (hwloc__xml_verbose()) - fprintf(stderr, "%s: ignoring !PU or !NUMA distances2 without gp_indexing\n", - state->global->msgprefix); + fprintf(stderr, "%s: ignoring !PU or !NUMA %s without gp_indexing\n", + state->global->msgprefix, _TAG_NAME); goto out_ignore; } } - hwloc_internal_distances_add_by_index(topology, type, nbobjs, indexes, u64values, kind, 0); + hwloc_internal_distances_add_by_index(topology, name, unique_type, different_types, nbobjs, indexes, u64values, kind, 0); /* prevent freeing below */ indexes = NULL; u64values = NULL; + different_types = NULL; out_ignore: + free(different_types); free(indexes); free(u64values); return state->global->close_tag(state); out_with_arrays: + free(different_types); free(indexes); free(u64values); out: return -1; +#undef _TAG_NAME } static int @@ -1593,7 +1661,7 @@ hwloc_convert_from_v1dist_floats(hwloc_topology_t topology, unsigned nbobjs, flo /* this canNOT be the first XML call */ static int -hwloc_look_xml(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus __hwloc_attribute_unused) +hwloc_look_xml(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus) { /* * This backend enforces !topology->is_thissystem by default. @@ -1608,6 +1676,8 @@ hwloc_look_xml(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus hwloc_localeswitch_declare; int ret; + assert(dstatus->phase == HWLOC_DISC_PHASE_GLOBAL); + state.global = data; assert(!root->cpuset); @@ -1652,15 +1722,20 @@ hwloc_look_xml(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus goto failed; if (!ret) break; - if (strcmp(tag, "distances2")) { + if (!strcmp(tag, "distances2")) { + ret = hwloc__xml_v2import_distances(topology, &childstate, 0); + if (ret < 0) + goto failed; + } else if (!strcmp(tag, "distances2hetero")) { + ret = hwloc__xml_v2import_distances(topology, &childstate, 1); + if (ret < 0) + goto failed; + } else { if (hwloc__xml_verbose()) fprintf(stderr, "%s: ignoring unknown tag `%s' after root object, expected `distances2'\n", data->msgprefix, tag); goto done; } - ret = hwloc__xml_v2import_distances(topology, &childstate); - if (ret < 0) - goto failed; state.global->close_child(&childstate); } } @@ -1710,8 +1785,8 @@ hwloc_look_xml(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus inext_cousin) objs[i] = node; -hwloc_convert_from_v1dist_floats(topology, nbobjs, v1dist->floats, values); - hwloc_internal_distances_add(topology, nbobjs, objs, values, v1dist->kind, 0); + hwloc_convert_from_v1dist_floats(topology, nbobjs, v1dist->floats, values); + hwloc_internal_distances_add(topology, NULL, nbobjs, objs, values, v1dist->kind, 0); } else { free(objs); free(values); @@ -1906,6 +1981,9 @@ hwloc__xml_export_safestrdup(const char *old) char *new = malloc(strlen(old)+1); char *dst = new; const char *src = old; + if (!new) + return NULL; + while (*src) { if (HWLOC_XML_CHAR_VALID(*src)) *(dst++) = *src; @@ -1925,6 +2003,8 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo if (v1export && obj->type == HWLOC_OBJ_PACKAGE) state->new_prop(state, "type", "Socket"); + else if (v1export && obj->type == HWLOC_OBJ_DIE) + state->new_prop(state, "type", "Group"); else if (v1export && hwloc__obj_type_is_cache(obj->type)) state->new_prop(state, "type", "Cache"); else @@ -1971,13 +2051,17 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo state->new_prop(state, "online_cpuset", setstring); free(setstring); - if (v1export || !obj->parent) { + if (v1export) { hwloc_bitmap_t allowed_cpuset = hwloc_bitmap_dup(obj->cpuset); hwloc_bitmap_and(allowed_cpuset, allowed_cpuset, topology->allowed_cpuset); hwloc_bitmap_asprintf(&setstring, allowed_cpuset); state->new_prop(state, "allowed_cpuset", setstring); free(setstring); hwloc_bitmap_free(allowed_cpuset); + } else if (!obj->parent) { + hwloc_bitmap_asprintf(&setstring, topology->allowed_cpuset); + state->new_prop(state, "allowed_cpuset", setstring); + free(setstring); } } @@ -1992,13 +2076,17 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo state->new_prop(state, "complete_nodeset", setstring); free(setstring); - if (v1export || !obj->parent) { + if (v1export) { hwloc_bitmap_t allowed_nodeset = hwloc_bitmap_dup(obj->nodeset); hwloc_bitmap_and(allowed_nodeset, allowed_nodeset, topology->allowed_nodeset); hwloc_bitmap_asprintf(&setstring, allowed_nodeset); state->new_prop(state, "allowed_nodeset", setstring); free(setstring); hwloc_bitmap_free(allowed_nodeset); + } else if (!obj->parent) { + hwloc_bitmap_asprintf(&setstring, topology->allowed_nodeset); + state->new_prop(state, "allowed_nodeset", setstring); + free(setstring); } } @@ -2009,13 +2097,17 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo if (obj->name) { char *name = hwloc__xml_export_safestrdup(obj->name); - state->new_prop(state, "name", name); - free(name); + if (name) { + state->new_prop(state, "name", name); + free(name); + } } if (!v1export && obj->subtype) { char *subtype = hwloc__xml_export_safestrdup(obj->subtype); - state->new_prop(state, "subtype", subtype); - free(subtype); + if (subtype) { + state->new_prop(state, "subtype", subtype); + free(subtype); + } } switch (obj->type) { @@ -2058,11 +2150,15 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo if (v1export) { sprintf(tmp, "%u", obj->attr->group.depth); state->new_prop(state, "depth", tmp); + if (obj->attr->group.dont_merge) + state->new_prop(state, "dont_merge", "1"); } else { sprintf(tmp, "%u", obj->attr->group.kind); state->new_prop(state, "kind", tmp); sprintf(tmp, "%u", obj->attr->group.subkind); state->new_prop(state, "subkind", tmp); + if (obj->attr->group.dont_merge) + state->new_prop(state, "dont_merge", "1"); } break; case HWLOC_OBJ_BRIDGE: @@ -2107,23 +2203,34 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo for(i=0; iinfos_count; i++) { char *name = hwloc__xml_export_safestrdup(obj->infos[i].name); char *value = hwloc__xml_export_safestrdup(obj->infos[i].value); - struct hwloc__xml_export_state_s childstate; - state->new_child(state, &childstate, "info"); - childstate.new_prop(&childstate, "name", name); - childstate.new_prop(&childstate, "value", value); - childstate.end_object(&childstate, "info"); + if (name && value) { + struct hwloc__xml_export_state_s childstate; + state->new_child(state, &childstate, "info"); + childstate.new_prop(&childstate, "name", name); + childstate.new_prop(&childstate, "value", value); + childstate.end_object(&childstate, "info"); + } free(name); free(value); } if (v1export && obj->subtype) { char *subtype = hwloc__xml_export_safestrdup(obj->subtype); + if (subtype) { + struct hwloc__xml_export_state_s childstate; + int is_coproctype = (obj->type == HWLOC_OBJ_OS_DEVICE && obj->attr->osdev.type == HWLOC_OBJ_OSDEV_COPROC); + state->new_child(state, &childstate, "info"); + childstate.new_prop(&childstate, "name", is_coproctype ? "CoProcType" : "Type"); + childstate.new_prop(&childstate, "value", subtype); + childstate.end_object(&childstate, "info"); + free(subtype); + } + } + if (v1export && obj->type == HWLOC_OBJ_DIE) { struct hwloc__xml_export_state_s childstate; - int is_coproctype = (obj->type == HWLOC_OBJ_OS_DEVICE && obj->attr->osdev.type == HWLOC_OBJ_OSDEV_COPROC); state->new_child(state, &childstate, "info"); - childstate.new_prop(&childstate, "name", is_coproctype ? "CoProcType" : "Type"); - childstate.new_prop(&childstate, "value", subtype); + childstate.new_prop(&childstate, "name", "Type"); + childstate.new_prop(&childstate, "value", "Die"); childstate.end_object(&childstate, "info"); - free(subtype); } if (v1export && !obj->parent) { @@ -2137,10 +2244,12 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo unsigned *logical_to_v2array; int depth; - if (nbobjs != (unsigned) hwloc_get_nbobjs_by_type(topology, dist->type)) + if (nbobjs != (unsigned) hwloc_get_nbobjs_by_type(topology, dist->unique_type)) continue; if (!(dist->kind & HWLOC_DISTANCES_KIND_MEANS_LATENCY)) continue; + if (dist->kind & HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES) + continue; logical_to_v2array = malloc(nbobjs * sizeof(*logical_to_v2array)); if (!logical_to_v2array) { @@ -2152,7 +2261,7 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo logical_to_v2array[dist->objs[i]->logical_index] = i; /* compute the relative depth */ - if (dist->type == HWLOC_OBJ_NUMANODE) { + if (dist->unique_type == HWLOC_OBJ_NUMANODE) { /* for NUMA nodes, use the highest normal-parent depth + 1 */ depth = -1; for(i=0; itype) + parent_with_memory; + depth = hwloc_get_type_depth(topology, dist->unique_type) + parent_with_memory; } state->new_child(state, &childstate, "distances"); @@ -2266,7 +2375,7 @@ static unsigned hwloc__xml_v1export_object_list_numanodes(hwloc_obj_t obj, hwloc_obj_t *first_p, hwloc_obj_t **nodes_p) { hwloc_obj_t *nodes, cur; - unsigned nr; + int nr; if (!obj->memory_first_child) { *first_p = NULL; @@ -2410,30 +2519,74 @@ hwloc__xml_v1export_object (hwloc__xml_export_state_t parentstate, hwloc_topolog } \ } while (0) +#define EXPORT_TYPE_GPINDEX_ARRAY(state, nr, objs, tagname, maxperline) do { \ + unsigned _i = 0; \ + while (_i<(nr)) { \ + char _tmp[255]; /* enough for (snprintf(type+index)+space) x maxperline */ \ + char _tmp2[16]; \ + size_t _len = 0; \ + unsigned _j; \ + struct hwloc__xml_export_state_s _childstate; \ + (state)->new_child(state, &_childstate, tagname); \ + for(_j=0; \ + _i+_j<(nr) && _jtype), (unsigned long long) (objs)[_i+_j]->gp_index); \ + _i += _j; \ + sprintf(_tmp2, "%lu", (unsigned long) _len); \ + _childstate.new_prop(&_childstate, "length", _tmp2); \ + _childstate.add_content(&_childstate, _tmp, _len); \ + _childstate.end_object(&_childstate, tagname); \ + } \ +} while (0) + static void -hwloc__xml_v2export_distances(hwloc__xml_export_state_t parentstate, hwloc_topology_t topology) +hwloc___xml_v2export_distances(hwloc__xml_export_state_t parentstate, struct hwloc_internal_distances_s *dist) { - struct hwloc_internal_distances_s *dist; - for(dist = topology->first_dist; dist; dist = dist->next) { - char tmp[255]; - unsigned nbobjs = dist->nbobjs; - struct hwloc__xml_export_state_s state; + char tmp[255]; + unsigned nbobjs = dist->nbobjs; + struct hwloc__xml_export_state_s state; + if (dist->different_types) { + parentstate->new_child(parentstate, &state, "distances2hetero"); + } else { parentstate->new_child(parentstate, &state, "distances2"); + state.new_prop(&state, "type", hwloc_obj_type_string(dist->unique_type)); + } - state.new_prop(&state, "type", hwloc_obj_type_string(dist->type)); - sprintf(tmp, "%u", nbobjs); - state.new_prop(&state, "nbobjs", tmp); - sprintf(tmp, "%lu", dist->kind); - state.new_prop(&state, "kind", tmp); + sprintf(tmp, "%u", nbobjs); + state.new_prop(&state, "nbobjs", tmp); + sprintf(tmp, "%lu", dist->kind); + state.new_prop(&state, "kind", tmp); + if (dist->name) + state.new_prop(&state, "name", dist->name); + if (!dist->different_types) { state.new_prop(&state, "indexing", - HWLOC_DIST_TYPE_USE_OS_INDEX(dist->type) ? "os" : "gp"); - /* TODO don't hardwire 10 below. either snprintf the max to guess it, or just append until the end of the buffer */ + HWLOC_DIST_TYPE_USE_OS_INDEX(dist->unique_type) ? "os" : "gp"); + } + + /* TODO don't hardwire 10 below. either snprintf the max to guess it, or just append until the end of the buffer */ + if (dist->different_types) { + EXPORT_TYPE_GPINDEX_ARRAY(&state, nbobjs, dist->objs, "indexes", 10); + } else { EXPORT_ARRAY(&state, unsigned long long, nbobjs, dist->indexes, "indexes", "%llu", 10); - EXPORT_ARRAY(&state, unsigned long long, nbobjs*nbobjs, dist->values, "u64values", "%llu", 10); - state.end_object(&state, "distances2"); } + EXPORT_ARRAY(&state, unsigned long long, nbobjs*nbobjs, dist->values, "u64values", "%llu", 10); + state.end_object(&state, dist->different_types ? "distances2hetero" : "distances2"); +} + +static void +hwloc__xml_v2export_distances(hwloc__xml_export_state_t parentstate, hwloc_topology_t topology) +{ + struct hwloc_internal_distances_s *dist; + for(dist = topology->first_dist; dist; dist = dist->next) + if (!dist->different_types) + hwloc___xml_v2export_distances(parentstate, dist); + /* export homogeneous distances first in case the importer doesn't support heterogeneous and stops there */ + for(dist = topology->first_dist; dist; dist = dist->next) + if (dist->different_types) + hwloc___xml_v2export_distances(parentstate, dist); } void @@ -2859,6 +3012,7 @@ hwloc_xml_backend_disable(struct hwloc_backend *backend) static struct hwloc_backend * hwloc_xml_component_instantiate(struct hwloc_topology *topology, struct hwloc_disc_component *component, + unsigned excluded_phases __hwloc_attribute_unused, const void *_data1, const void *_data2, const void *_data3) @@ -2938,8 +3092,8 @@ hwloc_xml_component_instantiate(struct hwloc_topology *topology, } static struct hwloc_disc_component hwloc_xml_disc_component = { - HWLOC_DISC_COMPONENT_TYPE_GLOBAL, "xml", + HWLOC_DISC_PHASE_GLOBAL, ~0, hwloc_xml_component_instantiate, 30, diff --git a/hwloc/topology.c b/hwloc/topology.c index 87029ef413..7a00e3aae8 100644 --- a/hwloc/topology.c +++ b/hwloc/topology.c @@ -1004,23 +1004,24 @@ hwloc_topology_dup(hwloc_topology_t *newp, static const unsigned obj_type_order[] = { /* first entry is HWLOC_OBJ_MACHINE */ 0, /* next entry is HWLOC_OBJ_PACKAGE */ 4, - /* next entry is HWLOC_OBJ_CORE */ 13, - /* next entry is HWLOC_OBJ_PU */ 17, - /* next entry is HWLOC_OBJ_L1CACHE */ 11, - /* next entry is HWLOC_OBJ_L2CACHE */ 9, - /* next entry is HWLOC_OBJ_L3CACHE */ 7, - /* next entry is HWLOC_OBJ_L4CACHE */ 6, - /* next entry is HWLOC_OBJ_L5CACHE */ 5, - /* next entry is HWLOC_OBJ_L1ICACHE */ 12, - /* next entry is HWLOC_OBJ_L2ICACHE */ 10, - /* next entry is HWLOC_OBJ_L3ICACHE */ 8, + /* next entry is HWLOC_OBJ_CORE */ 14, + /* next entry is HWLOC_OBJ_PU */ 18, + /* next entry is HWLOC_OBJ_L1CACHE */ 12, + /* next entry is HWLOC_OBJ_L2CACHE */ 10, + /* next entry is HWLOC_OBJ_L3CACHE */ 8, + /* next entry is HWLOC_OBJ_L4CACHE */ 7, + /* next entry is HWLOC_OBJ_L5CACHE */ 6, + /* next entry is HWLOC_OBJ_L1ICACHE */ 13, + /* next entry is HWLOC_OBJ_L2ICACHE */ 11, + /* next entry is HWLOC_OBJ_L3ICACHE */ 9, /* next entry is HWLOC_OBJ_GROUP */ 1, /* next entry is HWLOC_OBJ_NUMANODE */ 3, - /* next entry is HWLOC_OBJ_BRIDGE */ 14, - /* next entry is HWLOC_OBJ_PCI_DEVICE */ 15, - /* next entry is HWLOC_OBJ_OS_DEVICE */ 16, - /* next entry is HWLOC_OBJ_MISC */ 18, - /* next entry is HWLOC_OBJ_MEMCACHE */ 2 + /* next entry is HWLOC_OBJ_BRIDGE */ 15, + /* next entry is HWLOC_OBJ_PCI_DEVICE */ 16, + /* next entry is HWLOC_OBJ_OS_DEVICE */ 17, + /* next entry is HWLOC_OBJ_MISC */ 19, + /* next entry is HWLOC_OBJ_MEMCACHE */ 2, + /* next entry is HWLOC_OBJ_DIE */ 5 }; #ifndef NDEBUG /* only used in debug check assert if !NDEBUG */ @@ -1030,6 +1031,7 @@ static const hwloc_obj_type_t obj_order_type[] = { HWLOC_OBJ_MEMCACHE, HWLOC_OBJ_NUMANODE, HWLOC_OBJ_PACKAGE, + HWLOC_OBJ_DIE, HWLOC_OBJ_L5CACHE, HWLOC_OBJ_L4CACHE, HWLOC_OBJ_L3CACHE, @@ -1054,6 +1056,7 @@ static const hwloc_obj_type_t obj_order_type[] = { * Always keep Machine/NUMANode/PU/PCIDev/OSDev * then Core * then Package + * then Die * then Cache, * then Instruction Caches * then always drop Group/Misc/Bridge. @@ -1080,7 +1083,8 @@ static const int obj_type_priority[] = { /* next entry is HWLOC_OBJ_PCI_DEVICE */ 100, /* next entry is HWLOC_OBJ_OS_DEVICE */ 100, /* next entry is HWLOC_OBJ_MISC */ 0, - /* next entry is HWLOC_OBJ_MEMCACHE */ 19 + /* next entry is HWLOC_OBJ_MEMCACHE */ 19, + /* next entry is HWLOC_OBJ_DIE */ 30 }; int hwloc_compare_types (hwloc_obj_type_t type1, hwloc_obj_type_t type2) @@ -1268,8 +1272,30 @@ merge_insert_equal(hwloc_obj_t new, hwloc_obj_t old) static __hwloc_inline hwloc_obj_t hwloc__insert_try_merge_group(hwloc_obj_t old, hwloc_obj_t new) { - if (new->type == HWLOC_OBJ_GROUP) { - /* Groups are ignored keep_structure or always. Non-ignored Groups isn't possible (asserted in topology_check()). */ + if (new->type == HWLOC_OBJ_GROUP && old->type == HWLOC_OBJ_GROUP) { + /* which group do we keep? */ + if (new->attr->group.dont_merge) { + if (old->attr->group.dont_merge) + /* nobody wants to be merged */ + return NULL; + + /* keep the new one, it doesn't want to be merged */ + hwloc_replace_linked_object(old, new); + return new; + + } else { + if (old->attr->group.dont_merge) + /* keep the old one, it doesn't want to be merged */ + return old; + + /* compare subkinds to decice who to keep */ + if (new->attr->group.kind < old->attr->group.kind) + hwloc_replace_linked_object(old, new); + return old; + } + } + + if (new->type == HWLOC_OBJ_GROUP && !new->attr->group.dont_merge) { if (old->type == HWLOC_OBJ_PU && new->attr->group.kind == HWLOC_GROUP_KIND_MEMORY) /* Never merge Memory groups with PU, we don't want to attach Memory under PU */ @@ -1278,18 +1304,9 @@ hwloc__insert_try_merge_group(hwloc_obj_t old, hwloc_obj_t new) /* Remove the Group now. The normal ignore code path wouldn't tell us whether the Group was removed or not, * while some callers need to know (at least hwloc_topology_insert_group()). */ - - /* If merging two groups, keep the smallest kind. - * Replace the existing Group with the new Group contents - * and let the caller free the new Group. - */ - if (old->type == HWLOC_OBJ_GROUP - && (new->attr->group.kind < old->attr->group.kind)) - hwloc_replace_linked_object(old, new); - return old; - } else if (old->type == HWLOC_OBJ_GROUP) { + } else if (old->type == HWLOC_OBJ_GROUP && !old->attr->group.dont_merge) { if (new->type == HWLOC_OBJ_PU && old->attr->group.kind == HWLOC_GROUP_KIND_MEMORY) /* Never merge Memory groups with PU, we don't want to attach Memory under PU */ @@ -1300,9 +1317,11 @@ hwloc__insert_try_merge_group(hwloc_obj_t old, hwloc_obj_t new) */ hwloc_replace_linked_object(old, new); return old; - } - return NULL; + } else { + /* cannot merge */ + return NULL; + } } /* @@ -1748,11 +1767,18 @@ hwloc_alloc_setup_object(hwloc_topology_t topology, hwloc_obj_type_t type, unsigned os_index) { struct hwloc_obj *obj = hwloc_tma_malloc(topology->tma, sizeof(*obj)); + if (!obj) + return NULL; memset(obj, 0, sizeof(*obj)); obj->type = type; obj->os_index = os_index; obj->gp_index = topology->next_gp_index++; obj->attr = hwloc_tma_malloc(topology->tma, sizeof(*obj->attr)); + if (!obj->attr) { + assert(!topology->tma || !topology->tma->dontfree); /* this tma cannot fail to allocate */ + free(obj); + return NULL; + } memset(obj->attr, 0, sizeof(*obj->attr)); /* do not allocate the cpuset here, let the caller do it */ return obj; @@ -2289,6 +2315,20 @@ hwloc_reset_normal_type_depths(hwloc_topology_t topology) for (i=HWLOC_OBJ_TYPE_MIN; i<=HWLOC_OBJ_GROUP; i++) topology->type_depth[i] = HWLOC_TYPE_DEPTH_UNKNOWN; /* type contiguity is asserted in topology_check() */ + topology->type_depth[HWLOC_OBJ_DIE] = HWLOC_TYPE_DEPTH_UNKNOWN; +} + +static int +hwloc_dont_merge_group_level(hwloc_topology_t topology, unsigned i) +{ + unsigned j; + + /* Don't merge some groups in that level? */ + for(j=0; jlevel_nbobjects[i]; j++) + if (topology->levels[i][j]->attr->group.dont_merge) + return 1; + + return 0; } /* compare i-th and i-1-th levels structure */ @@ -2302,6 +2342,8 @@ hwloc_compare_levels_structure(hwloc_topology_t topology, unsigned i) return -1; for(j=0; jlevel_nbobjects[i]; j++) { + if (topology->levels[i-1][j] != topology->levels[i][j]->parent) + return -1; if (topology->levels[i-1][j]->arity != 1) return -1; if (checkmemory && topology->levels[i-1][j]->memory_arity) @@ -2328,12 +2370,18 @@ hwloc_filter_levels_keep_structure(hwloc_topology_t topology) hwloc_obj_type_t type2 = obj2->type; /* Check whether parents and/or children can be replaced */ - if (topology->type_filter[type1] == HWLOC_TYPE_FILTER_KEEP_STRUCTURE) + if (topology->type_filter[type1] == HWLOC_TYPE_FILTER_KEEP_STRUCTURE) { /* Parents can be ignored in favor of children. */ replaceparent = 1; - if (topology->type_filter[type2] == HWLOC_TYPE_FILTER_KEEP_STRUCTURE) + if (type1 == HWLOC_OBJ_GROUP && hwloc_dont_merge_group_level(topology, i-1)) + replaceparent = 0; + } + if (topology->type_filter[type2] == HWLOC_TYPE_FILTER_KEEP_STRUCTURE) { /* Children can be ignored in favor of parents. */ replacechild = 1; + if (type1 == HWLOC_OBJ_GROUP && hwloc_dont_merge_group_level(topology, i)) + replacechild = 0; + } if (!replacechild && !replaceparent) /* no ignoring */ continue; @@ -2690,6 +2738,9 @@ hwloc_build_level_from_list(struct hwloc_special_level_s *slevel) if (nb) { /* allocate and fill level */ slevel->objs = malloc(nb * sizeof(struct hwloc_obj *)); + if (!slevel->objs) + return -1; + obj = slevel->first; i = 0; while (obj) { @@ -2793,7 +2844,7 @@ hwloc_list_special_objects(hwloc_topology_t topology, hwloc_obj_t obj) } /* Build I/O levels */ -static void +static int hwloc_connect_io_misc_levels(hwloc_topology_t topology) { unsigned i; @@ -2804,8 +2855,12 @@ hwloc_connect_io_misc_levels(hwloc_topology_t topology) hwloc_list_special_objects(topology, topology->levels[0][0]); - for(i=0; islevels[i]); + for(i=0; islevels[i]) < 0) + return -1; + } + + return 0; } /* @@ -2882,7 +2937,11 @@ hwloc_connect_levels(hwloc_topology_t topology) /* allocate enough to take all current objects and an ending NULL */ taken_objs = malloc((n_objs+1) * sizeof(taken_objs[0])); - assert(taken_objs); + if (!taken_objs) { + free(objs); + errno = ENOMEM; + return -1; + } /* allocate enough to keep all current objects or their children */ n_new_objs = 0; @@ -2893,7 +2952,12 @@ hwloc_connect_levels(hwloc_topology_t topology) n_new_objs++; } new_objs = malloc(n_new_objs * sizeof(new_objs[0])); - assert(new_objs); + if (!new_objs) { + free(objs); + free(taken_objs); + errno = ENOMEM; + return -1; + } /* now actually take these objects */ n_new_objs = 0; @@ -3002,7 +3066,8 @@ hwloc_topology_reconnect(struct hwloc_topology *topology, unsigned long flags) if (hwloc_connect_levels(topology) < 0) return -1; - hwloc_connect_io_misc_levels(topology); + if (hwloc_connect_io_misc_levels(topology) < 0) + return -1; topology->modified = 0; @@ -3085,21 +3150,33 @@ void hwloc_alloc_root_sets(hwloc_obj_t root) root->complete_nodeset = hwloc_bitmap_alloc(); } +static void +hwloc_discover_by_phase(struct hwloc_topology *topology, + struct hwloc_disc_status *dstatus, + const char *phasename __hwloc_attribute_unused) +{ + struct hwloc_backend *backend; + hwloc_debug("%s phase discovery...\n", phasename); + for(backend = topology->backends; backend; backend = backend->next) { + if (dstatus->phase & dstatus->excluded_phases) + break; + if (!(backend->phases & dstatus->phase)) + continue; + if (!backend->discover) + continue; + hwloc_debug("%s phase discovery in component %s...\n", phasename, backend->component->name); + backend->discover(backend, dstatus); + hwloc_debug_print_objects(0, topology->levels[0][0]); + } +} + /* Main discovery loop */ static int -hwloc_discover(struct hwloc_topology *topology) +hwloc_discover(struct hwloc_topology *topology, + struct hwloc_disc_status *dstatus) { - struct hwloc_backend *backend; - struct hwloc_disc_status dstatus; const char *env; - dstatus.flags = 0; /* did nothing yet */ - - env = getenv("HWLOC_ALLOW"); - if (env && !strcmp(env, "all")) - /* don't retrieve the sets of allowed resources */ - dstatus.flags |= HWLOC_DISC_STATUS_FLAG_GOT_ALLOWED_RESOURCES; - topology->modified = 0; /* no need to reconnect yet */ topology->allowed_cpuset = hwloc_bitmap_alloc_full(); @@ -3140,44 +3217,70 @@ hwloc_discover(struct hwloc_topology *topology) * automatically propagated to the whole tree after detection. */ - /* - * Discover CPUs first - */ - backend = topology->backends; - while (NULL != backend) { - if (backend->component->type != HWLOC_DISC_COMPONENT_TYPE_CPU - && backend->component->type != HWLOC_DISC_COMPONENT_TYPE_GLOBAL) - /* not yet */ - goto next_cpubackend; - if (!backend->discover) - goto next_cpubackend; - backend->discover(backend, &dstatus); + if (topology->backend_phases & HWLOC_DISC_PHASE_GLOBAL) { + /* usually, GLOBAL is alone. + * but HWLOC_ANNOTATE_GLOBAL_COMPONENTS=1 allows optional ANNOTATE steps. + */ + struct hwloc_backend *global_backend = topology->backends; + assert(global_backend); + assert(global_backend->phases == HWLOC_DISC_PHASE_GLOBAL); + + /* + * Perform the single-component-based GLOBAL discovery + */ + hwloc_debug("GLOBAL phase discovery...\n"); + hwloc_debug("GLOBAL phase discovery with component %s...\n", global_backend->component->name); + dstatus->phase = HWLOC_DISC_PHASE_GLOBAL; + global_backend->discover(global_backend, dstatus); hwloc_debug_print_objects(0, topology->levels[0][0]); + } + /* Don't explicitly ignore other phases, in case there's ever + * a need to bring them back. + * The component with usually exclude them by default anyway. + * Except if annotating global components is explicitly requested. + */ + + if (topology->backend_phases & HWLOC_DISC_PHASE_CPU) { + /* + * Discover CPUs first + */ + dstatus->phase = HWLOC_DISC_PHASE_CPU; + hwloc_discover_by_phase(topology, dstatus, "CPU"); + } -next_cpubackend: - backend = backend->next; + if (!(topology->backend_phases & (HWLOC_DISC_PHASE_GLOBAL|HWLOC_DISC_PHASE_CPU))) { + hwloc_debug("No GLOBAL or CPU component phase found\n"); + /* we'll fail below */ } /* One backend should have called hwloc_alloc_root_sets() * and set bits during PU and NUMA insert. */ if (!topology->levels[0][0]->cpuset || hwloc_bitmap_iszero(topology->levels[0][0]->cpuset)) { - hwloc_debug("%s", "No PU added by any CPU and global backend\n"); + hwloc_debug("%s", "No PU added by any CPU or GLOBAL component phase\n"); errno = EINVAL; return -1; } + /* + * Memory-specific discovery + */ + if (topology->backend_phases & HWLOC_DISC_PHASE_MEMORY) { + dstatus->phase = HWLOC_DISC_PHASE_MEMORY; + hwloc_discover_by_phase(topology, dstatus, "MEMORY"); + } + if (/* check if getting the sets of locally allowed resources is possible */ topology->binding_hooks.get_allowed_resources && topology->is_thissystem /* check whether it has been done already */ - && !(dstatus.flags & HWLOC_DISC_STATUS_FLAG_GOT_ALLOWED_RESOURCES) + && !(dstatus->flags & HWLOC_DISC_STATUS_FLAG_GOT_ALLOWED_RESOURCES) /* check whether it was explicitly requested */ && ((topology->flags & HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES) != 0 || ((env = getenv("HWLOC_THISSYSTEM_ALLOWED_RESOURCES")) != NULL && atoi(env)))) { /* OK, get the sets of locally allowed resources */ topology->binding_hooks.get_allowed_resources(topology); - dstatus.flags |= HWLOC_DISC_STATUS_FLAG_GOT_ALLOWED_RESOURCES; + dstatus->flags |= HWLOC_DISC_STATUS_FLAG_GOT_ALLOWED_RESOURCES; } /* If there's no NUMA node, add one with all the memory. @@ -3257,30 +3360,28 @@ hwloc_discover(struct hwloc_topology *topology) hwloc_debug_print_objects(0, topology->levels[0][0]); /* - * Additional discovery with other backends + * Additional discovery */ - - backend = topology->backends; - while (NULL != backend) { - if (backend->component->type == HWLOC_DISC_COMPONENT_TYPE_CPU - || backend->component->type == HWLOC_DISC_COMPONENT_TYPE_GLOBAL) - /* already done above */ - goto next_noncpubackend; - if (!backend->discover) - goto next_noncpubackend; - backend->discover(backend, &dstatus); - hwloc_debug_print_objects(0, topology->levels[0][0]); - -next_noncpubackend: - backend = backend->next; + if (topology->backend_phases & HWLOC_DISC_PHASE_PCI) { + dstatus->phase = HWLOC_DISC_PHASE_PCI; + hwloc_discover_by_phase(topology, dstatus, "PCI"); + } + if (topology->backend_phases & HWLOC_DISC_PHASE_IO) { + dstatus->phase = HWLOC_DISC_PHASE_IO; + hwloc_discover_by_phase(topology, dstatus, "IO"); + } + if (topology->backend_phases & HWLOC_DISC_PHASE_MISC) { + dstatus->phase = HWLOC_DISC_PHASE_MISC; + hwloc_discover_by_phase(topology, dstatus, "MISC"); + } + if (topology->backend_phases & HWLOC_DISC_PHASE_ANNOTATE) { + dstatus->phase = HWLOC_DISC_PHASE_ANNOTATE; + hwloc_discover_by_phase(topology, dstatus, "ANNOTATE"); } if (getenv("HWLOC_DEBUG_SORT_CHILDREN")) hwloc_debug_sort_children(topology->levels[0][0]); - hwloc_debug("%s", "\nNow reconnecting\n"); - hwloc_debug_print_objects(0, topology->levels[0][0]); - /* Remove some stuff */ hwloc_debug("%s", "\nRemoving bridge objects if needed\n"); @@ -3432,7 +3533,7 @@ hwloc__topology_init (struct hwloc_topology **topologyp, topology->support.cpubind = hwloc_tma_malloc(tma, sizeof(*topology->support.cpubind)); topology->support.membind = hwloc_tma_malloc(tma, sizeof(*topology->support.membind)); - topology->nb_levels_allocated = nblevels; /* enough for default 9 levels = Mach+Pack+NUMA+L3+L2+L1d+L1i+Co+PU */ + topology->nb_levels_allocated = nblevels; /* enough for default 10 levels = Mach+Pack+Die+NUMA+L3+L2+L1d+L1i+Co+PU */ topology->levels = hwloc_tma_calloc(tma, topology->nb_levels_allocated * sizeof(*topology->levels)); topology->level_nbobjects = hwloc_tma_calloc(tma, topology->nb_levels_allocated * sizeof(*topology->level_nbobjects)); @@ -3455,7 +3556,7 @@ int hwloc_topology_init (struct hwloc_topology **topologyp) { return hwloc__topology_init(topologyp, - 16, /* 16 is enough for default 9 levels = Mach+Pack+NUMA+L3+L2+L1d+L1i+Co+PU */ + 16, /* 16 is enough for default 10 levels = Mach+Pack+Die+NUMA+L3+L2+L1d+L1i+Co+PU */ NULL); /* no TMA for normal topologies, too many allocations to fix */ } @@ -3705,6 +3806,8 @@ hwloc_topology_destroy (struct hwloc_topology *topology) int hwloc_topology_load (struct hwloc_topology *topology) { + struct hwloc_disc_status dstatus; + const char *env; int err; if (topology->is_loaded) { @@ -3760,6 +3863,14 @@ hwloc_topology_load (struct hwloc_topology *topology) } } + dstatus.excluded_phases = 0; + dstatus.flags = 0; /* did nothing yet */ + + env = getenv("HWLOC_ALLOW"); + if (env && !strcmp(env, "all")) + /* don't retrieve the sets of allowed resources */ + dstatus.flags |= HWLOC_DISC_STATUS_FLAG_GOT_ALLOWED_RESOURCES; + /* instantiate all possible other backends now */ hwloc_disc_components_enable_others(topology); /* now that backends are enabled, update the thissystem flag and some callbacks */ @@ -3774,7 +3885,7 @@ hwloc_topology_load (struct hwloc_topology *topology) hwloc_pci_discovery_prepare(topology); /* actual topology discovery */ - err = hwloc_discover(topology); + err = hwloc_discover(topology, &dstatus); if (err < 0) goto out; @@ -3796,6 +3907,12 @@ hwloc_topology_load (struct hwloc_topology *topology) hwloc_internal_distances_refresh(topology); topology->is_loaded = 1; + + if (topology->backend_phases & HWLOC_DISC_PHASE_TWEAK) { + dstatus.phase = HWLOC_DISC_PHASE_TWEAK; + hwloc_discover_by_phase(topology, &dstatus, "TWEAK"); + } + return 0; out: @@ -4125,6 +4242,9 @@ hwloc_topology_allow(struct hwloc_topology *topology, goto error; } topology->binding_hooks.get_allowed_resources(topology); + /* make sure the backend returned something sane (Linux cpusets may return offline PUs in some cases) */ + hwloc_bitmap_and(topology->allowed_cpuset, topology->allowed_cpuset, hwloc_get_root_obj(topology)->cpuset); + hwloc_bitmap_and(topology->allowed_nodeset, topology->allowed_nodeset, hwloc_get_root_obj(topology)->nodeset); break; } case HWLOC_ALLOW_FLAG_CUSTOM: { @@ -4670,7 +4790,8 @@ hwloc_topology_check(struct hwloc_topology *topology) HWLOC_BUILD_ASSERT(HWLOC_OBJ_PCI_DEVICE + 1 == HWLOC_OBJ_OS_DEVICE); HWLOC_BUILD_ASSERT(HWLOC_OBJ_OS_DEVICE + 1 == HWLOC_OBJ_MISC); HWLOC_BUILD_ASSERT(HWLOC_OBJ_MISC + 1 == HWLOC_OBJ_MEMCACHE); - HWLOC_BUILD_ASSERT(HWLOC_OBJ_MEMCACHE + 1 == HWLOC_OBJ_TYPE_MAX); + HWLOC_BUILD_ASSERT(HWLOC_OBJ_MEMCACHE + 1 == HWLOC_OBJ_DIE); + HWLOC_BUILD_ASSERT(HWLOC_OBJ_DIE + 1 == HWLOC_OBJ_TYPE_MAX); /* make sure order and priority arrays have the right size */ HWLOC_BUILD_ASSERT(sizeof(obj_type_order)/sizeof(*obj_type_order) == HWLOC_OBJ_TYPE_MAX); diff --git a/hwloc/traversal.c b/hwloc/traversal.c index 80693d9f7a..0b744d7875 100644 --- a/hwloc/traversal.c +++ b/hwloc/traversal.c @@ -243,6 +243,7 @@ hwloc_obj_type_string (hwloc_obj_type_t obj) case HWLOC_OBJ_MEMCACHE: return "MemCache"; case HWLOC_OBJ_NUMANODE: return "NUMANode"; case HWLOC_OBJ_PACKAGE: return "Package"; + case HWLOC_OBJ_DIE: return "Die"; case HWLOC_OBJ_L1CACHE: return "L1Cache"; case HWLOC_OBJ_L2CACHE: return "L2Cache"; case HWLOC_OBJ_L3CACHE: return "L3Cache"; @@ -346,6 +347,8 @@ hwloc_type_sscanf(const char *string, hwloc_obj_type_t *typep, } else if (hwloc__type_match(string, "package", 2) || hwloc__type_match(string, "socket", 2)) { /* backward compat with v1.10 */ type = HWLOC_OBJ_PACKAGE; + } else if (hwloc__type_match(string, "die", 2)) { + type = HWLOC_OBJ_DIE; } else if (hwloc__type_match(string, "core", 2)) { type = HWLOC_OBJ_CORE; } else if (hwloc__type_match(string, "pu", 2)) { @@ -476,6 +479,7 @@ hwloc_obj_type_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t case HWLOC_OBJ_NUMANODE: case HWLOC_OBJ_MEMCACHE: case HWLOC_OBJ_PACKAGE: + case HWLOC_OBJ_DIE: case HWLOC_OBJ_CORE: case HWLOC_OBJ_PU: return hwloc_snprintf(string, size, "%s", hwloc_obj_type_string(type)); diff --git a/include/Makefile.am b/include/Makefile.am index 1b2017b4e7..3292ea7338 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -1,4 +1,4 @@ -# Copyright © 2009-2018 Inria. All rights reserved. +# Copyright © 2009-2019 Inria. All rights reserved. # Copyright © 2009-2010 Université Bordeaux # Copyright © 2009-2014 Cisco Systems, Inc. All rights reserved. # Copyright © 2011 Oracle and/or its affiliates. All rights reserved. @@ -30,7 +30,6 @@ include_hwloc_HEADERS = \ hwloc/nvml.h \ hwloc/plugins.h \ hwloc/gl.h \ - hwloc/intel-mic.h \ hwloc/rename.h \ hwloc/deprecated.h include_hwloc_autogendir = $(includedir)/hwloc/autogen diff --git a/include/hwloc.h b/include/hwloc.h index bc51aa2b40..e106e9cc0c 100644 --- a/include/hwloc.h +++ b/include/hwloc.h @@ -87,7 +87,7 @@ extern "C" { * actually modifies the API. * * Users may check for available features at build time using this number - * (see \ref faq_upgrade). + * (see \ref faq_version_api). * * \note This should not be confused with HWLOC_VERSION, the library version. * Two stable releases of the same series usually have the same ::HWLOC_API_VERSION @@ -187,7 +187,8 @@ typedef enum { HWLOC_OBJ_PACKAGE, /**< \brief Physical package. * The physical package that usually gets inserted * into a socket on the motherboard. - * A processor package usually contains multiple cores. + * A processor package usually contains multiple cores, + * and possibly some dies. */ HWLOC_OBJ_CORE, /**< \brief Core. * A computation unit (may be shared by several @@ -297,6 +298,11 @@ typedef enum { * main tree. */ + HWLOC_OBJ_DIE, /**< \brief Die within a physical package. + * A subpart of the physical package, that contains multiple cores. + * \hideinitializer + */ + HWLOC_OBJ_TYPE_MAX /**< \private Sentinel value */ } hwloc_obj_type_t; @@ -604,6 +610,7 @@ union hwloc_obj_attr_u { * It may change if intermediate Group objects are added. */ unsigned kind; /**< \brief Internally-used kind of group. */ unsigned subkind; /**< \brief Internally-used subkind to distinguish different levels of groups with same kind */ + unsigned char dont_merge; /**< \brief Flag preventing groups from being automatically merged with identical parent or children. */ } group; /** \brief PCI Device specific Object Attributes */ struct hwloc_pcidev_attr_s { @@ -1819,6 +1826,9 @@ enum hwloc_topology_components_flag_e { * \p name is the name of the discovery component that should not be used * when loading topology \p topology. The name is a string such as "cuda". * + * For components with multiple phases, it may also be suffixed with the name + * of a phase, for instance "linux:io". + * * \p flags should be ::HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST. * * This may be used to avoid expensive parts of the discovery process. @@ -2047,7 +2057,7 @@ HWLOC_DECLSPEC const struct hwloc_topology_support *hwloc_topology_get_support(h * * By default, most objects are kept (::HWLOC_TYPE_FILTER_KEEP_ALL). * Instruction caches, I/O and Misc objects are ignored by default (::HWLOC_TYPE_FILTER_KEEP_NONE). - * Group levels are ignored unless they bring structure (::HWLOC_TYPE_FILTER_KEEP_STRUCTURE). + * Die and Group levels are ignored unless they bring structure (::HWLOC_TYPE_FILTER_KEEP_STRUCTURE). * * Note that group objects are also ignored individually (without the entire level) * when they do not bring structure. @@ -2306,6 +2316,9 @@ HWLOC_DECLSPEC hwloc_obj_t hwloc_topology_alloc_group_object(hwloc_topology_t to * the final location of the Group in the topology. * Then the object can be passed to this function for actual insertion in the topology. * + * The group \p dont_merge attribute may be set to prevent the core from + * ever merging this object with another object hierarchically-identical. + * * Either the cpuset or nodeset field (or both, if compatible) must be set * to a non-empty bitmap. The complete_cpuset or complete_nodeset may be set * instead if inserting with respect to the complete topology diff --git a/include/hwloc/distances.h b/include/hwloc/distances.h index d523f29fc8..b7baed8a4b 100644 --- a/include/hwloc/distances.h +++ b/include/hwloc/distances.h @@ -87,7 +87,12 @@ enum hwloc_distances_kind_e { * Such values are currently ignored for distance-based grouping. * \hideinitializer */ - HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH = (1UL<<3) + HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH = (1UL<<3), + + /** \brief This distances structure covers objects of different types. + * \hideinitializer + */ + HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES = (1UL<<4) }; /** \brief Retrieve distance matrices. @@ -131,20 +136,32 @@ hwloc_distances_get_by_depth(hwloc_topology_t topology, int depth, * * Identical to hwloc_distances_get() with the additional \p type filter. */ -static __hwloc_inline int +HWLOC_DECLSPEC int hwloc_distances_get_by_type(hwloc_topology_t topology, hwloc_obj_type_t type, unsigned *nr, struct hwloc_distances_s **distances, - unsigned long kind, unsigned long flags) -{ - int depth = hwloc_get_type_depth(topology, type); - if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE) { - *nr = 0; - return 0; - } - return hwloc_distances_get_by_depth(topology, depth, nr, distances, kind, flags); -} + unsigned long kind, unsigned long flags); -/** \brief Release a distance matrix structure previously returned by hwloc_distances_get(). */ +/** \brief Retrieve a distance matrix with the given name. + * + * Usually only one distances structure may match a given name. + */ +HWLOC_DECLSPEC int +hwloc_distances_get_by_name(hwloc_topology_t topology, const char *name, + unsigned *nr, struct hwloc_distances_s **distances, + unsigned long flags); + +/** \brief Get a description of what a distances structure contains. + * + * For instance "NUMALatency" for hardware-provided NUMA distances (ACPI SLIT), + * or NULL if unknown. + */ +HWLOC_DECLSPEC const char * +hwloc_distances_get_name(hwloc_topology_t topology, struct hwloc_distances_s *distances); + +/** \brief Release a distance matrix structure previously returned by hwloc_distances_get(). + * + * \note This function is not required if the structure is removed with hwloc_distances_release_remove(). + */ HWLOC_DECLSPEC void hwloc_distances_release(hwloc_topology_t topology, struct hwloc_distances_s *distances); @@ -221,11 +238,11 @@ enum hwloc_distances_add_flag_e { * The distance from object i to object j is in slot i*nbobjs+j. * * \p kind specifies the kind of distance as a OR'ed set of ::hwloc_distances_kind_e. + * Kind ::HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES will be automatically added + * if objects of different types are given. * * \p flags configures the behavior of the function using an optional OR'ed set of * ::hwloc_distances_add_flag_e. - * - * Objects must be of the same type. They cannot be of type Group. */ HWLOC_DECLSPEC int hwloc_distances_add(hwloc_topology_t topology, unsigned nbobjs, hwloc_obj_t *objs, hwloc_uint64_t *values, @@ -237,7 +254,7 @@ HWLOC_DECLSPEC int hwloc_distances_add(hwloc_topology_t topology, * gathered through the OS. * * If these distances were used to group objects, these additional - *Group objects are not removed from the topology. + * Group objects are not removed from the topology. */ HWLOC_DECLSPEC int hwloc_distances_remove(hwloc_topology_t topology); @@ -260,6 +277,12 @@ hwloc_distances_remove_by_type(hwloc_topology_t topology, hwloc_obj_type_t type) return hwloc_distances_remove_by_depth(topology, depth); } +/** \brief Release and remove the given distance matrice from the topology. + * + * This function includes a call to hwloc_distances_release(). + */ +HWLOC_DECLSPEC int hwloc_distances_release_remove(hwloc_topology_t topology, struct hwloc_distances_s *distances); + /** @} */ diff --git a/include/hwloc/helper.h b/include/hwloc/helper.h index 765de1ca14..2fa24d728a 100644 --- a/include/hwloc/helper.h +++ b/include/hwloc/helper.h @@ -803,7 +803,13 @@ enum hwloc_distrib_flags_e { /** \brief Distrib in reverse order, starting from the last objects. * \hideinitializer */ - HWLOC_DISTRIB_FLAG_REVERSE = (1UL<<0) + HWLOC_DISTRIB_FLAG_REVERSE = (1UL<<0), + /** \brief Distrib in shuffled order. This flag preserves the iteration policy. + * See hwloc_distrib_build_iterator(). Does not work with hwloc_distrib(). + * \hideinitializer + */ + HWLOC_DISTRIB_FLAG_SHUFFLE = (1UL<<1) + }; /** \brief Distribute \p n items over the topology under \p roots @@ -920,7 +926,7 @@ hwloc_topology_get_complete_cpuset(hwloc_topology_t topology) __hwloc_attribute_ * \note The returned cpuset is not newly allocated and should thus not be * changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy. * - * \note This is equivalent to retrieving the root object complete CPU-set. + * \note This is equivalent to retrieving the root object CPU-set. */ HWLOC_DECLSPEC hwloc_const_cpuset_t hwloc_topology_get_topology_cpuset(hwloc_topology_t topology) __hwloc_attribute_pure; @@ -951,7 +957,7 @@ hwloc_topology_get_allowed_cpuset(hwloc_topology_t topology) __hwloc_attribute_p * \note The returned nodeset is not newly allocated and should thus not be * changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy. * - * \note This is equivalent to retrieving the root object complete CPU-set. + * \note This is equivalent to retrieving the root object complete nodeset. */ HWLOC_DECLSPEC hwloc_const_nodeset_t hwloc_topology_get_complete_nodeset(hwloc_topology_t topology) __hwloc_attribute_pure; @@ -965,7 +971,7 @@ hwloc_topology_get_complete_nodeset(hwloc_topology_t topology) __hwloc_attribute * \note The returned nodeset is not newly allocated and should thus not be * changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy. * - * \note This is equivalent to retrieving the root object complete CPU-set. + * \note This is equivalent to retrieving the root object nodeset. */ HWLOC_DECLSPEC hwloc_const_nodeset_t hwloc_topology_get_topology_nodeset(hwloc_topology_t topology) __hwloc_attribute_pure; @@ -1154,6 +1160,82 @@ hwloc_bridge_covers_pcibus(hwloc_obj_t bridge, && bridge->attr->bridge.downstream.pci.subordinate_bus >= bus; } +/** Iterator of topology objects **/ +struct hwloc_distrib_iterator; + +/** + * Create a round-robin iterator of the whole topology on a specific type of resource. + * \p type must have a positive depth and a cpuset. + * Use HWLOC_DISTRIB_FLAG_REVERSE \p flags to iterate in a reversed round-robin fashion. + * Use HWLOC_DISTRIB_FLAG_SHUFFLE \p flags for a random iterator. + **/ +HWLOC_DECLSPEC struct hwloc_distrib_iterator * +hwloc_distrib_iterator_round_robin(hwloc_topology_t topology, + const hwloc_obj_type_t type, + const unsigned long flags); + +/** + * Create a scatter iterator of the whole topology on a specific type of resources. + * Objects are iterated such that consecutive element are as far as possible of the + * previously iterated objects. + * \p type must have a positive depth and a cpuset. + * Use HWLOC_DISTRIB_FLAG_REVERSE \p flags to iterate in a reversed scatter fashion. + * Use HWLOC_DISTRIB_FLAG_SHUFFLE \p flags to iterate in a randomized scatter fashion. + **/ +HWLOC_DECLSPEC struct hwloc_distrib_iterator * +hwloc_distrib_iterator_scatter(hwloc_topology_t topology, + const hwloc_obj_type_t type, + const unsigned long flags); + +/** + * \brief Build a custom iterator to iterate over topology objects. + * + * This is the generic method for building iterators. + * Distributed objects are the deepest objects found in \p levels, + * below \p roots. + * On each iteration, \p it makes one step on the topology in the order of \p + * levels and outputs a new object. + * Round-robin iterator is built by providing levels in ascending depth. + * Scatter iterator is built by providing levels in descending depth. + * + * \p topology: The topology used to distribute objects. + * \p roots: A set of topology objects below which are distributed objects. + * This is used to subset the topology. + * \p n_roots: The number of root objects. + * \p levels: The levels used to enumerate objects. The deepest level is used + * to output objects. The order of levels defines the distribution policy. + * \p n_levels: The number of levels. + * \p flags: Additional flags used to distribute objects. See hwloc_distrib_flags_e. + * + * All \p levels objects and \p roots must have a valid cpuset. + * All \p levels must have a single positive depth in the \p topology. + * All \p levels must have at least one object below \p roots. + * + * This function returns a new iterator populated on success. + * On failure, NULL is returned and the error is printed to stderr. + **/ +HWLOC_DECLSPEC struct hwloc_distrib_iterator * +hwloc_distrib_build_iterator(hwloc_topology_t topology, + hwloc_obj_t *roots, + const unsigned n_roots, + const hwloc_obj_type_t *levels, + const unsigned n_levels, + const unsigned long flags); + +/** \brief Free memory allocated for \p it **/ +HWLOC_DECLSPEC void hwloc_distrib_destroy_iterator(struct hwloc_distrib_iterator *it); + +/** + * On each iteration, \p it makes one step on the topology in the order of levels + * (see hwloc_distrib_build_iterator()) and output a new object in \p next. + * This iterator cycles and return 0 if the end of a cycle has been reached, else 1. + * Topology must be the same topology as the one used to create the iterator. + **/ +HWLOC_DECLSPEC int hwloc_distrib_iterator_next(hwloc_topology_t topology, + struct hwloc_distrib_iterator *it, + hwloc_obj_t *next); + + /** @} */ diff --git a/include/hwloc/intel-mic.h b/include/hwloc/intel-mic.h deleted file mode 100644 index c504cd7e02..0000000000 --- a/include/hwloc/intel-mic.h +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright © 2013-2016 Inria. All rights reserved. - * See COPYING in top-level directory. - */ - -/** \file - * \brief Macros to help interaction between hwloc and Intel Xeon Phi (MIC). - * - * Applications that use both hwloc and Intel Xeon Phi (MIC) may want to - * include this file so as to get topology information for MIC devices. - */ - -#ifndef HWLOC_INTEL_MIC_H -#define HWLOC_INTEL_MIC_H - -#include "hwloc.h" -#include "hwloc/autogen/config.h" -#include "hwloc/helper.h" - -#ifdef HWLOC_LINUX_SYS -#include "hwloc/linux.h" - -#include -#include -#endif - -#include -#include - - -#ifdef __cplusplus -extern "C" { -#endif - - -/** \defgroup hwlocality_intel_mic Interoperability with Intel Xeon Phi (MIC) - * - * This interface offers ways to retrieve topology information about - * Intel Xeon Phi (MIC) devices. - * - * @{ - */ - -/** \brief Get the CPU set of logical processors that are physically - * close to MIC device whose index is \p idx. - * - * Return the CPU set describing the locality of the MIC device whose index is \p idx. - * - * Topology \p topology and device index \p idx must match the local machine. - * I/O devices detection is not needed in the topology. - * - * The function only returns the locality of the device. - * If more information about the device is needed, OS objects should - * be used instead, see hwloc_intel_mic_get_device_osdev_by_index(). - * - * This function is currently only implemented in a meaningful way for - * Linux; other systems will simply get a full cpuset. - */ -static __hwloc_inline int -hwloc_intel_mic_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused, - int idx __hwloc_attribute_unused, - hwloc_cpuset_t set) -{ -#ifdef HWLOC_LINUX_SYS - /* If we're on Linux, use the sysfs mechanism to get the local cpus */ -#define HWLOC_INTEL_MIC_DEVICE_SYSFS_PATH_MAX 128 - char path[HWLOC_INTEL_MIC_DEVICE_SYSFS_PATH_MAX]; - DIR *sysdir = NULL; - struct dirent *dirent; - unsigned pcibus, pcidev, pcifunc; - - if (!hwloc_topology_is_thissystem(topology)) { - errno = EINVAL; - return -1; - } - - sprintf(path, "/sys/class/mic/mic%d", idx); - sysdir = opendir(path); - if (!sysdir) - return -1; - - while ((dirent = readdir(sysdir)) != NULL) { - if (sscanf(dirent->d_name, "pci_%02x:%02x.%02x", &pcibus, &pcidev, &pcifunc) == 3) { - sprintf(path, "/sys/class/mic/mic%d/pci_%02x:%02x.%02x/local_cpus", idx, pcibus, pcidev, pcifunc); - if (hwloc_linux_read_path_as_cpumask(path, set) < 0 - || hwloc_bitmap_iszero(set)) - hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); - break; - } - } - - closedir(sysdir); -#else - /* Non-Linux systems simply get a full cpuset */ - hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology)); -#endif - return 0; -} - -/** \brief Get the hwloc OS device object corresponding to the - * MIC device for the given index. - * - * Return the OS device object describing the MIC device whose index is \p idx. - * Return NULL if there is none. - * - * The topology \p topology does not necessarily have to match the current - * machine. For instance the topology may be an XML import of a remote host. - * I/O devices detection must be enabled in the topology. - * - * \note The corresponding PCI device object can be obtained by looking - * at the OS device parent object. - */ -static __hwloc_inline hwloc_obj_t -hwloc_intel_mic_get_device_osdev_by_index(hwloc_topology_t topology, - unsigned idx) -{ - hwloc_obj_t osdev = NULL; - while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) { - if (HWLOC_OBJ_OSDEV_COPROC == osdev->attr->osdev.type - && osdev->name - && !strncmp("mic", osdev->name, 3) - && atoi(osdev->name + 3) == (int) idx) - return osdev; - } - return NULL; -} - -/** @} */ - - -#ifdef __cplusplus -} /* extern "C" */ -#endif - - -#endif /* HWLOC_INTEL_MIC_H */ diff --git a/include/hwloc/plugins.h b/include/hwloc/plugins.h index 4869caa003..0f53ac4d4e 100644 --- a/include/hwloc/plugins.h +++ b/include/hwloc/plugins.h @@ -26,52 +26,36 @@ struct hwloc_backend; * @{ */ -/** \brief Discovery component type */ -typedef enum hwloc_disc_component_type_e { - /** \brief CPU-only discovery through the OS, or generic no-OS support. - * \hideinitializer */ - HWLOC_DISC_COMPONENT_TYPE_CPU = (1<<0), - - /** \brief xml or synthetic, - * platform-specific components such as bgq. - * Anything the discovers CPU and everything else. - * No misc backend is expected to complement a global component. - * \hideinitializer */ - HWLOC_DISC_COMPONENT_TYPE_GLOBAL = (1<<1), - - /** \brief OpenCL, Cuda, etc. - * \hideinitializer */ - HWLOC_DISC_COMPONENT_TYPE_MISC = (1<<2) -} hwloc_disc_component_type_t; - /** \brief Discovery component structure * * This is the major kind of components, taking care of the discovery. * They are registered by generic components, either statically-built or as plugins. */ struct hwloc_disc_component { - /** \brief Discovery component type */ - hwloc_disc_component_type_t type; - /** \brief Name. * If this component is built as a plugin, this name does not have to match the plugin filename. */ const char *name; - /** \brief Component types to exclude, as an OR'ed set of ::hwloc_disc_component_type_e. + /** \brief Discovery phases performed by this component. + * OR'ed set of ::hwloc_disc_phase_t + */ + unsigned phases; + + /** \brief Component phases to exclude, as an OR'ed set of ::hwloc_disc_phase_t. * - * For a GLOBAL component, this usually includes all other types (~0). + * For a GLOBAL component, this usually includes all other phases (\c ~UL). * * Other components only exclude types that may bring conflicting * topology information. MISC components should likely not be excluded * since they usually bring non-primary additional information. */ - unsigned excludes; + unsigned excluded_phases; /** \brief Instantiate callback to create a backend from the component. * Parameters data1, data2, data3 are NULL except for components * that have special enabling routines such as hwloc_topology_set_xml(). */ - struct hwloc_backend * (*instantiate)(struct hwloc_topology *topology, struct hwloc_disc_component *component, const void *data1, const void *data2, const void *data3); + struct hwloc_backend * (*instantiate)(struct hwloc_topology *topology, struct hwloc_disc_component *component, unsigned excluded_phases, const void *data1, const void *data2, const void *data3); /** \brief Component priority. * Used to sort topology->components, higher priority first. @@ -108,10 +92,48 @@ struct hwloc_disc_component { * @{ */ +/** \brief Discovery phase */ +typedef enum hwloc_disc_phase_e { + /** \brief xml or synthetic, platform-specific components such as bgq. + * Discovers everything including CPU, memory, I/O and everything else. + * A component with a Global phase usually excludes all other phases. + * \hideinitializer */ + HWLOC_DISC_PHASE_GLOBAL = (1U<<0), + + /** \brief CPU discovery. + * \hideinitializer */ + HWLOC_DISC_PHASE_CPU = (1U<<1), + + /** \brief Attach memory to existing CPU objects. + * \hideinitializer */ + HWLOC_DISC_PHASE_MEMORY = (1U<<2), + + /** \brief Attach PCI devices and bridges to existing CPU objects. + * \hideinitializer */ + HWLOC_DISC_PHASE_PCI = (1U<<3), + + /** \brief I/O discovery that requires PCI devices (OS devices such as OpenCL, CUDA, etc.). + * \hideinitializer */ + HWLOC_DISC_PHASE_IO = (1U<<4), + + /** \brief Misc objects that gets added below anything else. + * \hideinitializer */ + HWLOC_DISC_PHASE_MISC = (1U<<5), + + /** \brief Annotating existing objects, adding distances, etc. + * \hideinitializer */ + HWLOC_DISC_PHASE_ANNOTATE = (1U<<6), + + /** \brief Final tweaks to a ready-to-use topology. + * This phase runs once the topology is loaded, before it is returned to the topology. + * Hence it may only use the main hwloc API for modifying the topology, + * for instance by restricting it, adding info attributes, etc. + * \hideinitializer */ + HWLOC_DISC_PHASE_TWEAK = (1U<<7) +} hwloc_disc_phase_t; + /** \brief Discovery status flags */ enum hwloc_disc_status_flag_e { - /** \brief PCI discovery has been performed \hideinitializer */ - HWLOC_DISC_STATUS_FLAG_PCI_DONE = (1UL<<0), /** \brief The sets of allowed resources were already retrieved \hideinitializer */ HWLOC_DISC_STATUS_FLAG_GOT_ALLOWED_RESOURCES = (1UL<<1) }; @@ -122,6 +144,16 @@ enum hwloc_disc_status_flag_e { * during the discovery process. */ struct hwloc_disc_status { + /** \brief The current discovery phase that is performed. + * Must match one of the phases in the component phases field. + */ + hwloc_disc_phase_t phase; + + /** \brief Dynamically excluded phases. + * If a component decides during discovery that some phases are no longer needed. + */ + unsigned excluded_phases; + /** \brief OR'ed set of hwloc_disc_status_flag_e */ unsigned long flags; }; @@ -154,6 +186,11 @@ struct hwloc_backend { /** \private Reserved for the core. Used internally to list backends topology->backends. */ struct hwloc_backend * next; + /** \brief Discovery phases performed by this component, possibly without some of them if excluded by other components. + * OR'ed set of ::hwloc_disc_phase_t + */ + unsigned phases; + /** \brief Backend flags, currently always 0. */ unsigned long flags; diff --git a/include/hwloc/rename.h b/include/hwloc/rename.h index c24a5616cd..3eafea7278 100644 --- a/include/hwloc/rename.h +++ b/include/hwloc/rename.h @@ -28,6 +28,7 @@ extern "C" { #define HWLOC_MUNGE_NAME(a, b) HWLOC_MUNGE_NAME2(a, b) #define HWLOC_MUNGE_NAME2(a, b) a ## b #define HWLOC_NAME(name) HWLOC_MUNGE_NAME(HWLOC_SYM_PREFIX, hwloc_ ## name) +/* FIXME: should be "HWLOC_ ## name" below, unchanged because it doesn't matter much and could break some embedders hacks */ #define HWLOC_NAME_CAPS(name) HWLOC_MUNGE_NAME(HWLOC_SYM_PREFIX_CAPS, hwloc_ ## name) /* Now define all the "real" names to be the prefixed names. This @@ -51,6 +52,7 @@ extern "C" { #define HWLOC_OBJ_NUMANODE HWLOC_NAME_CAPS(OBJ_NUMANODE) #define HWLOC_OBJ_MEMCACHE HWLOC_NAME_CAPS(OBJ_MEMCACHE) #define HWLOC_OBJ_PACKAGE HWLOC_NAME_CAPS(OBJ_PACKAGE) +#define HWLOC_OBJ_DIE HWLOC_NAME_CAPS(OBJ_DIE) #define HWLOC_OBJ_CORE HWLOC_NAME_CAPS(OBJ_CORE) #define HWLOC_OBJ_PU HWLOC_NAME_CAPS(OBJ_PU) #define HWLOC_OBJ_L1CACHE HWLOC_NAME_CAPS(OBJ_L1CACHE) @@ -355,7 +357,14 @@ extern "C" { #define hwloc_get_obj_below_array_by_type HWLOC_NAME(get_obj_below_array_by_type) #define hwloc_distrib_flags_e HWLOC_NAME(distrib_flags_e) #define HWLOC_DISTRIB_FLAG_REVERSE HWLOC_NAME_CAPS(DISTRIB_FLAG_REVERSE) +#define HWLOC_DISTRIB_FLAG_SHUFFLE HWLOC_NAME_CAPS(DISTRIB_FLAG_SHUFFLE) #define hwloc_distrib HWLOC_NAME(distrib) +#define hwloc_distrib_iterator_next HWLOC_NAME(distrib_iterator_next) +#define hwloc_distrib_destroy_iterator HWLOC_NAME(distrib_destroy_iterator) +#define hwloc_distrib_build_iterator HWLOC_NAME(distrib_build_iterator) +#define hwloc_distrib_iterator_scatter HWLOC_NAME(distrib_iterator_scatter) +#define hwloc_distrib_iterator_round_robin HWLOC_NAME(distrib_iterator_round_robin) +#define hwloc_distrib_iterator HWLOC_NAME(distrib_iterator) #define hwloc_alloc_membind_policy HWLOC_NAME(alloc_membind_policy) #define hwloc_alloc_membind_policy_nodeset HWLOC_NAME(alloc_membind_policy_nodeset) #define hwloc_topology_get_complete_cpuset HWLOC_NAME(topology_get_complete_cpuset) @@ -395,10 +404,13 @@ extern "C" { #define HWLOC_DISTANCES_KIND_FROM_USER HWLOC_NAME_CAPS(DISTANCES_KIND_FROM_USER) #define HWLOC_DISTANCES_KIND_MEANS_LATENCY HWLOC_NAME_CAPS(DISTANCES_KIND_MEANS_LATENCY) #define HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH HWLOC_NAME_CAPS(DISTANCES_KIND_MEANS_BANDWIDTH) +#define HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES HWLOC_NAME_CAPS(DISTANCES_KIND_HETEROGENEOUS_TYPES) #define hwloc_distances_get HWLOC_NAME(distances_get) #define hwloc_distances_get_by_depth HWLOC_NAME(distances_get_by_depth) #define hwloc_distances_get_by_type HWLOC_NAME(distances_get_by_type) +#define hwloc_distances_get_by_name HWLOC_NAME(distances_get_by_name) +#define hwloc_distances_get_name HWLOC_NAME(distances_get_name) #define hwloc_distances_release HWLOC_NAME(distances_release) #define hwloc_distances_obj_index HWLOC_NAME(distances_obj_index) #define hwloc_distances_obj_pair_values HWLOC_NAME(distances_pair_values) @@ -411,6 +423,7 @@ extern "C" { #define hwloc_distances_remove HWLOC_NAME(distances_remove) #define hwloc_distances_remove_by_depth HWLOC_NAME(distances_remove_by_depth) #define hwloc_distances_remove_by_type HWLOC_NAME(distances_remove_by_type) +#define hwloc_distances_release_remove HWLOC_NAME(distances_release_remove) /* diff.h */ @@ -477,11 +490,6 @@ extern "C" { #define hwloc_ibv_get_device_osdev HWLOC_NAME(ibv_get_device_osdev) #define hwloc_ibv_get_device_osdev_by_name HWLOC_NAME(ibv_get_device_osdev_by_name) -/* intel-mic.h */ - -#define hwloc_intel_mic_get_device_cpuset HWLOC_NAME(intel_mic_get_device_cpuset) -#define hwloc_intel_mic_get_device_osdev_by_index HWLOC_NAME(intel_mic_get_device_osdev_by_index) - /* opencl.h */ #define hwloc_cl_device_topology_amd HWLOC_NAME(cl_device_topology_amd) @@ -519,15 +527,19 @@ extern "C" { /* hwloc/plugins.h */ -#define hwloc_disc_component_type_e HWLOC_NAME(disc_component_type_e) -#define HWLOC_DISC_COMPONENT_TYPE_CPU HWLOC_NAME_CAPS(DISC_COMPONENT_TYPE_CPU) -#define HWLOC_DISC_COMPONENT_TYPE_GLOBAL HWLOC_NAME_CAPS(DISC_COMPONENT_TYPE_GLOBAL) -#define HWLOC_DISC_COMPONENT_TYPE_MISC HWLOC_NAME_CAPS(DISC_COMPONENT_TYPE_MISC) -#define hwloc_disc_component_type_t HWLOC_NAME(disc_component_type_t) +#define hwloc_disc_phase_e HWLOC_NAME(disc_phase_e) +#define HWLOC_DISC_PHASE_GLOBAL HWLOC_NAME_CAPS(DISC_PHASE_GLOBAL) +#define HWLOC_DISC_PHASE_CPU HWLOC_NAME_CAPS(DISC_PHASE_CPU) +#define HWLOC_DISC_PHASE_MEMORY HWLOC_NAME_CAPS(DISC_PHASE_MEMORY) +#define HWLOC_DISC_PHASE_PCI HWLOC_NAME_CAPS(DISC_PHASE_PCI) +#define HWLOC_DISC_PHASE_IO HWLOC_NAME_CAPS(DISC_PHASE_IO) +#define HWLOC_DISC_PHASE_MISC HWLOC_NAME_CAPS(DISC_PHASE_MISC) +#define HWLOC_DISC_PHASE_ANNOTATE HWLOC_NAME_CAPS(DISC_PHASE_ANNOTATE) +#define HWLOC_DISC_PHASE_TWEAK HWLOC_NAME_CAPS(DISC_PHASE_TWEAK) +#define hwloc_disc_phase_t HWLOC_NAME(disc_phase_t) #define hwloc_disc_component HWLOC_NAME(disc_component) #define hwloc_disc_status_flag_e HWLOC_NAME(disc_status_flag_e) -#define HWLOC_DISC_STATUS_FLAG_PCI_DONE HWLOC_NAME_CAPS(DISC_STATUS_FLAG_PCI_DONE) #define HWLOC_DISC_STATUS_FLAG_GOT_ALLOWED_RESOURCES HWLOC_NAME_CAPS(DISC_STATUS_FLAG_GOT_ALLOWED_RESOURCES) #define hwloc_disc_status HWLOC_NAME(disc_status) @@ -679,7 +691,6 @@ extern "C" { #define hwloc_cuda_component HWLOC_NAME(cuda_component) #define hwloc_gl_component HWLOC_NAME(gl_component) -#define hwloc_linuxio_component HWLOC_NAME(linuxio_component) #define hwloc_nvml_component HWLOC_NAME(nvml_component) #define hwloc_opencl_component HWLOC_NAME(opencl_component) #define hwloc_pci_component HWLOC_NAME(pci_component) diff --git a/include/private/internal-components.h b/include/private/internal-components.h index b138a0eb9d..d3c897836b 100644 --- a/include/private/internal-components.h +++ b/include/private/internal-components.h @@ -1,5 +1,5 @@ /* - * Copyright © 2018 Inria. All rights reserved. + * Copyright © 2018-2019 Inria. All rights reserved. * * See COPYING in top-level directory. */ @@ -29,7 +29,6 @@ HWLOC_DECLSPEC extern const struct hwloc_component hwloc_x86_component; /* I/O discovery */ HWLOC_DECLSPEC extern const struct hwloc_component hwloc_cuda_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_gl_component; -HWLOC_DECLSPEC extern const struct hwloc_component hwloc_linuxio_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_nvml_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_opencl_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_pci_component; diff --git a/include/private/misc.h b/include/private/misc.h index 4522df08ea..6c02d793bb 100644 --- a/include/private/misc.h +++ b/include/private/misc.h @@ -439,7 +439,7 @@ hwloc_linux_pci_link_speed_from_string(const char *string) static __hwloc_inline int hwloc__obj_type_is_normal (hwloc_obj_type_t type) { /* type contiguity is asserted in topology_check() */ - return type <= HWLOC_OBJ_GROUP; + return type <= HWLOC_OBJ_GROUP || type == HWLOC_OBJ_DIE; } /* Any object attached to memory children, currently NUMA nodes or Memory-side caches */ diff --git a/include/private/private.h b/include/private/private.h index 9c7fa5629b..5f8789376b 100644 --- a/include/private/private.h +++ b/include/private/private.h @@ -130,7 +130,18 @@ struct hwloc_topology { int userdata_not_decoded; struct hwloc_internal_distances_s { - hwloc_obj_type_t type; + char *name; /* FIXME: needs an API to set it from user */ + + unsigned id; /* to match the container id field of public distances structure + * not exported to XML, regenerated during _add() + */ + + /* if all objects have the same type, different_types is NULL and unique_type is valid. + * otherwise unique_type is HWLOC_OBJ_TYPE_NONE and different_types contains individual objects types. + */ + hwloc_obj_type_t unique_type; + hwloc_obj_type_t *different_types; + /* add union hwloc_obj_attr_u if we ever support groups */ unsigned nbobjs; uint64_t *indexes; /* array of OS or GP indexes before we can convert them into objs. @@ -142,11 +153,12 @@ struct hwloc_topology { */ unsigned long kind; +#define HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID (1U<<0) /* if the objs array is valid below */ + unsigned iflags; + /* objects are currently stored in physical_index order */ hwloc_obj_t *objs; /* array of objects */ - int objs_are_valid; /* set to 1 if the array objs is still valid, 0 if needs refresh */ - unsigned id; /* to match the container id field of public distances structure */ struct hwloc_internal_distances_s *prev, *next; } *first_dist, *last_dist; unsigned next_dist_id; @@ -160,7 +172,8 @@ struct hwloc_topology { /* list of enabled backends. */ struct hwloc_backend * backends; struct hwloc_backend * get_pci_busid_cpuset_backend; /* first backend that provides get_pci_busid_cpuset() callback */ - unsigned backend_excludes; + unsigned backend_phases; + unsigned backend_excluded_phases; /* memory allocator for topology objects */ struct hwloc_tma * tma; @@ -194,6 +207,7 @@ struct hwloc_topology { unsigned nr_blacklisted_components; struct hwloc_topology_forced_component_s { struct hwloc_disc_component *component; + unsigned phases; } *blacklisted_components; /* FIXME: keep until topo destroy and reuse for finding specific buses */ @@ -334,8 +348,8 @@ extern void hwloc_internal_distances_prepare(hwloc_topology_t topology); extern void hwloc_internal_distances_destroy(hwloc_topology_t topology); extern int hwloc_internal_distances_dup(hwloc_topology_t new, hwloc_topology_t old); extern void hwloc_internal_distances_refresh(hwloc_topology_t topology); -extern int hwloc_internal_distances_add(hwloc_topology_t topology, unsigned nbobjs, hwloc_obj_t *objs, uint64_t *values, unsigned long kind, unsigned long flags); -extern int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, hwloc_obj_type_t type, unsigned nbobjs, uint64_t *indexes, uint64_t *values, unsigned long kind, unsigned long flags); +extern int hwloc_internal_distances_add(hwloc_topology_t topology, const char *name, unsigned nbobjs, hwloc_obj_t *objs, uint64_t *values, unsigned long kind, unsigned long flags); +extern int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, const char *name, hwloc_obj_type_t unique_type, hwloc_obj_type_t *different_types, unsigned nbobjs, uint64_t *indexes, uint64_t *values, unsigned long kind, unsigned long flags); extern void hwloc_internal_distances_invalidate_cached_objs(hwloc_topology_t topology); /* encode src buffer into target buffer. diff --git a/tests/hwloc/Makefile.am b/tests/hwloc/Makefile.am index 1d896ecef0..0ab9a35004 100644 --- a/tests/hwloc/Makefile.am +++ b/tests/hwloc/Makefile.am @@ -1,4 +1,4 @@ -# Copyright © 2009-2018 Inria. All rights reserved. +# Copyright © 2009-2019 Inria. All rights reserved. # Copyright © 2009-2012 Université Bordeaux # Copyright © 2009-2014 Cisco Systems, Inc. All rights reserved. # See COPYING in top-level directory. @@ -8,7 +8,7 @@ AM_CPPFLAGS = $(HWLOC_CPPFLAGS) -DXMLTESTDIR=\"$(abs_top_srcdir)/tests/hwloc/xml AM_LDFLAGS = $(HWLOC_LDFLAGS) SUBDIRS = . ports xml -DIST_SUBDIRS = ports xml linux x86 rename +DIST_SUBDIRS = ports xml linux x86 x86+linux rename if HWLOC_HAVE_LINUX SUBDIRS += linux @@ -18,11 +18,20 @@ if HWLOC_HAVE_X86_CPUID SUBDIRS += x86 endif HWLOC_HAVE_X86_CPUID +if HWLOC_HAVE_LINUX +if HWLOC_HAVE_X86_CPUID +SUBDIRS += x86+linux +endif HWLOC_HAVE_X86_CPUID +endif HWLOC_HAVE_LINUX + LDADD = LOG_COMPILER = $(builddir)/wrapper.sh -check_PROGRAMS = hwloc_list_components \ +check_PROGRAMS = \ + hwloc_api_version \ + hwloc_list_components \ + hwloc_distrib \ hwloc_bitmap \ hwloc_bitmap_string \ hwloc_bitmap_compare_inclusion \ @@ -57,8 +66,7 @@ check_PROGRAMS = hwloc_list_components \ hwloc_obj_infos \ hwloc_iodevs \ xmlbuffer \ - gl \ - intel-mic + gl if !HWLOC_HAVE_WINDOWS if !HWLOC_HAVE_DARWIN diff --git a/tests/hwloc/hwloc_api_version.c b/tests/hwloc/hwloc_api_version.c new file mode 100644 index 0000000000..17d72ebb66 --- /dev/null +++ b/tests/hwloc/hwloc_api_version.c @@ -0,0 +1,16 @@ +/* + * Copyright © 2019 Inria. All rights reserved. + * See COPYING in top-level directory. + */ + +#include "hwloc.h" + +#include + +int main(void) +{ + unsigned static_version = HWLOC_API_VERSION; + unsigned dynamic_version = hwloc_get_api_version(); + assert(static_version == dynamic_version); + return 0; +} diff --git a/tests/hwloc/hwloc_backends.c b/tests/hwloc/hwloc_backends.c index 08ff1eb31c..30d572b3ac 100644 --- a/tests/hwloc/hwloc_backends.c +++ b/tests/hwloc/hwloc_backends.c @@ -1,5 +1,5 @@ /* - * Copyright © 2012-2018 Inria. All rights reserved. + * Copyright © 2012-2019 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -211,7 +211,9 @@ int main(void) assert(errno == EINVAL); #endif #ifdef HWLOC_LINUX_SYS - err = hwloc_topology_set_components(topology1, HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST, "linux"); + err = hwloc_topology_set_components(topology1, HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST, "linux:0xf"); + assert(!err); + err = hwloc_topology_set_components(topology1, HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST, "linux:0xfffffff0"); assert(!err); #endif #ifdef HWLOC_BGQ_SYS diff --git a/tests/hwloc/hwloc_distances.c b/tests/hwloc/hwloc_distances.c index bfe9a005ab..b4af1fff15 100644 --- a/tests/hwloc/hwloc_distances.c +++ b/tests/hwloc/hwloc_distances.c @@ -1,5 +1,5 @@ /* - * Copyright © 2010-2017 Inria. All rights reserved. + * Copyright © 2010-2019 Inria. All rights reserved. * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. */ @@ -52,9 +52,11 @@ static void check_distances(hwloc_topology_t topology, int depth, unsigned expec assert(!err); printf("distance matrix for depth %d:\n", depth); print_distances(distances[0]); + assert(!hwloc_distances_get_name(topology, distances[0])); hwloc_distances_release(topology, distances[0]); if (nr > 1) { print_distances(distances[1]); + assert(!hwloc_distances_get_name(topology, distances[1])); hwloc_distances_release(topology, distances[1]); } } @@ -233,12 +235,35 @@ int main(void) hwloc_distances_release(topology, distances[0]); hwloc_distances_release(topology, distances[1]); + /* inserting heterogeneous distance */ + printf("\nInserting heterogeneous distances\n"); + objs[0] = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, 0); + objs[1] = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, 1); + objs[2] = hwloc_get_obj_by_depth(topology, 1, 0); + for(i=0; i<3; i++) { + for(j=0; j<3; j++) + values[i*3+j] = 10; + values[i*3+i] = 5; + } + err = hwloc_distances_add(topology, 3, objs, values, + HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH|HWLOC_DISTANCES_KIND_FROM_USER, + 0); + assert(!err); + /* check distances by kind */ nr = 2; err = hwloc_distances_get(topology, &nr, distances, HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH, 0); assert(!err); - assert(nr == 1); + assert(nr == 2); hwloc_distances_release(topology, distances[0]); + assert(distances[1]->objs[0]->type == HWLOC_OBJ_NUMANODE); + assert(distances[1]->objs[0]->logical_index == 0); + assert(distances[1]->objs[1]->type == HWLOC_OBJ_CORE); + assert(distances[1]->objs[1]->logical_index == 1); + assert(distances[1]->objs[2]->type == HWLOC_OBJ_GROUP); + assert(distances[1]->objs[2]->logical_index == 0); + assert(distances[1]->kind == (HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH|HWLOC_DISTANCES_KIND_FROM_USER|HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES)); + hwloc_distances_release(topology, distances[1]); nr = 2; err = hwloc_distances_get(topology, &nr, distances, HWLOC_DISTANCES_KIND_MEANS_LATENCY|HWLOC_DISTANCES_KIND_FROM_OS, 0); assert(!err); @@ -250,6 +275,29 @@ int main(void) hwloc_distances_release(topology, distances[0]); hwloc_distances_release(topology, distances[1]); + /* check distances by name */ + nr = 0; + err = hwloc_distances_get_by_name(topology, NULL, &nr, distances, 0); + assert(!err); + assert(nr == 4); + nr = 0; + err = hwloc_distances_get_by_name(topology, "nomatch", &nr, distances, 0); + assert(!err); + assert(nr == 0); + + /* removing one PU distance */ + printf("Removing the 2nd PU distances\n"); + nr = 2; + err = hwloc_distances_get_by_type(topology, HWLOC_OBJ_PU, &nr, distances, 0, 0); + assert(!err); + assert(nr == 2); + hwloc_distances_release(topology, distances[0]); + hwloc_distances_release_remove(topology, distances[1]); + nr = 0; + err = hwloc_distances_get_by_type(topology, HWLOC_OBJ_PU, &nr, distances, 0, 0); + assert(!err); + assert(nr == 1); + /* remove distances */ printf("Removing distances\n"); /* remove both PU distances */ diff --git a/tests/hwloc/hwloc_distrib.c b/tests/hwloc/hwloc_distrib.c new file mode 100644 index 0000000000..344f07f291 --- /dev/null +++ b/tests/hwloc/hwloc_distrib.c @@ -0,0 +1,240 @@ +/*************************************************************************** + * Copyright 2019 UChicago Argonne, LLC. + * Author: Nicolas Denoyelle + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING in top-level directory. +****************************************************************************/ + +#include +#include +#include +#include "private/autogen/config.h" +#include "hwloc.h" +#include "hwloc/helper.h" + +static hwloc_topology_t hwloc_test_topology_load(const char *file) +{ + hwloc_topology_t topology; + + if (hwloc_topology_init(&topology)) { + perror("hwloc_topology_init"); + goto error; + } + + if (file != NULL && hwloc_topology_set_xml(topology, file) != 0) { + perror("hwloc_topology_set_xml"); + goto error; + } + + hwloc_topology_set_type_filter(topology, HWLOC_OBJ_PU, + HWLOC_TYPE_FILTER_KEEP_ALL); + hwloc_topology_set_type_filter(topology, HWLOC_OBJ_NUMANODE, + HWLOC_TYPE_FILTER_KEEP_ALL); + hwloc_topology_set_type_filter(topology, HWLOC_OBJ_PCI_DEVICE, + HWLOC_TYPE_FILTER_KEEP_NONE); + hwloc_topology_set_type_filter(topology, HWLOC_OBJ_OS_DEVICE, + HWLOC_TYPE_FILTER_KEEP_NONE); + + if (hwloc_topology_load(topology) != 0) { + perror("hwloc_topology_load"); + goto error_with_topology; + } + + return topology; + +error_with_topology: + hwloc_topology_destroy(topology); +error: + return NULL; +} + +static int is_tleaf(hwloc_topology_t topology) +{ + hwloc_obj_t next, obj = hwloc_get_obj_by_depth(topology, 0, 0); + while(obj){ + next = obj->next_cousin; + while(next){ + if(next->arity != obj->arity) + return 0; + next = next->next_cousin; + } + obj = obj->first_child; + } + return 1; +} + +static void test_round_robin_PU(hwloc_topology_t topology) +{ + int i,nPU = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU); + struct hwloc_distrib_iterator *it; + hwloc_obj_t PU, item; + + it = hwloc_distrib_iterator_round_robin(topology, HWLOC_OBJ_PU, 0); + assert(it != NULL); + + for(i=0; icpuset, + HWLOC_OBJ_PU, i/ncore); + assert(PU == item); + } + hwloc_distrib_destroy_iterator(it); +} + +static void test_scatter(hwloc_topology_t topology) +{ + // This test works only if topology is a tleaf. + if ( !is_tleaf(topology) ) + return; + + ssize_t i=0, j, r, n_levels=0, n=0, c, val, nleaves=1; + hwloc_obj_t item, obj, root = hwloc_get_obj_by_depth(topology, 0, 0); + struct hwloc_distrib_iterator *it; + + it = hwloc_distrib_iterator_scatter(topology, HWLOC_OBJ_PU, 0); + assert(it != NULL); + + obj = root; + while(obj){ + if ((obj->cpuset != NULL && !hwloc_bitmap_iszero(obj->cpuset)) && + hwloc_get_type_depth(topology, obj->type) >= 0) + n++; + obj = obj->first_child; + } + assert(n != 0); + + ssize_t arities[n]; + + item = obj = root; + next_level: + do { + obj = obj->first_child; + } while( obj != NULL && (obj->cpuset == NULL || + hwloc_bitmap_iszero(obj->cpuset) || + hwloc_get_type_depth(topology, obj->type) < 0)); + if(obj != NULL){ + arities[n-1-i] = hwloc_get_nbobjs_inside_cpuset_by_type(topology, + item->cpuset, + obj->type); + nleaves *= arities[n-1-i]; + i++; + item = obj; + goto next_level; + } + + n_levels=n; + for (i = 0; hwloc_distrib_iterator_next(topology, it, &item); i++) { + c = i; + n = nleaves; + val = 0; + for (j = (n_levels-1); j > 0; j--) { + r = c % arities[j]; + n = n / arities[j]; + c = c / arities[j]; + val += n * r; + } + assert(item->logical_index == val); + } + + hwloc_distrib_destroy_iterator(it); +} + +/***************************************************************************/ +/* Run Tests */ +/***************************************************************************/ + +int main(void) +{ + hwloc_topology_t topology; + DIR* xml_dir = opendir(XMLTESTDIR); + if(xml_dir == NULL){ + perror("opendir"); + return 1; + } + + char fname[512]; + struct dirent *dirent; + for(dirent = readdir(xml_dir); + dirent != NULL; + dirent = readdir(xml_dir)){ + // Not supported by solaris and not critical. + /* if(dirent->d_type != DT_REG) */ + /* continue; */ + if(strcmp(dirent->d_name + strlen(dirent->d_name) - 4, + ".xml")) + continue; + memset(fname, 0, sizeof(fname)); + snprintf(fname, + sizeof(fname), + "%s/%s", + XMLTESTDIR, + dirent->d_name); + topology = hwloc_test_topology_load(fname); + if(topology == NULL) + continue; + test_round_robin_PU(topology); + test_reversed_round_robin_PU(topology); + test_round_robin_Core_PU(topology); + test_scatter(topology); + hwloc_topology_destroy(topology); + } + + closedir(xml_dir); + return 0; +} + diff --git a/tests/hwloc/hwloc_get_last_cpu_location.c b/tests/hwloc/hwloc_get_last_cpu_location.c index 0cb56d351f..362402462c 100644 --- a/tests/hwloc/hwloc_get_last_cpu_location.c +++ b/tests/hwloc/hwloc_get_last_cpu_location.c @@ -1,5 +1,5 @@ /* - * Copyright © 2011-2017 Inria. All rights reserved. + * Copyright © 2011-2019 Inria. All rights reserved. * Copyright © 2011 Université Bordeaux. All rights reserved. * See COPYING in top-level directory. */ @@ -79,7 +79,7 @@ int main(void) depth = hwloc_topology_get_depth(topology); /* check at intermediate level if it exists */ if (depth >= 3) { - printf("testing at depth %d\n", (depth-1)/2); + printf("testing at depth %u\n", (depth-1)/2); obj = NULL; while ((obj = hwloc_get_next_obj_by_depth(topology, (depth-1)/2, obj)) != NULL) checkall(obj->cpuset); diff --git a/tests/hwloc/hwloc_iodevs.c b/tests/hwloc/hwloc_iodevs.c index 7488ae14d8..a51082fd0d 100644 --- a/tests/hwloc/hwloc_iodevs.c +++ b/tests/hwloc/hwloc_iodevs.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2015 Inria. All rights reserved. + * Copyright © 2009-2019 Inria. All rights reserved. * Copyright © Université Bordeaux * See COPYING in top-level directory. */ @@ -56,7 +56,7 @@ int main(void) obj = NULL; while ((obj = hwloc_get_next_osdev(topology, obj)) != NULL) { assert(obj->type == HWLOC_OBJ_OS_DEVICE); - printf(" Found OS device %s subtype %d\n", obj->name, obj->attr->osdev.type); + printf(" Found OS device %s subtype %d\n", obj->name, (int) obj->attr->osdev.type); } assert(HWLOC_TYPE_DEPTH_BRIDGE == hwloc_get_type_depth(topology, HWLOC_OBJ_BRIDGE)); diff --git a/tests/hwloc/hwloc_topology_abi.c b/tests/hwloc/hwloc_topology_abi.c index 9164d26fd6..6774b003bf 100644 --- a/tests/hwloc/hwloc_topology_abi.c +++ b/tests/hwloc/hwloc_topology_abi.c @@ -36,42 +36,42 @@ int main(void) #if (defined HWLOC_LINUX_SYS) && (defined HWLOC_X86_64_ARCH) if (!getenv("HWLOC_IGNORE_TOPOLOGY_ABI")) { size_t size, offset __hwloc_attribute_unused; - printf("checking offsets and sizes in struct hwloc_topology for topology ABI 0x%x...\n", HWLOC_TOPOLOGY_ABI); + printf("checking offsets and sizes in struct hwloc_topology for topology ABI 0x%x...\n", (unsigned) HWLOC_TOPOLOGY_ABI); /******************************************************************* * WARNING: if anything breaks below, the topology ABI has changed. * HWLOC_TOPOLOGY_ABI must be bumped when updating these checks. *******************************************************************/ - HWLOC_BUILD_ASSERT(HWLOC_OBJ_TYPE_MAX == 19); + HWLOC_BUILD_ASSERT(HWLOC_OBJ_TYPE_MAX == 20); HWLOC_BUILD_ASSERT(HWLOC_NR_SLEVELS == 6); offset = offsetof(struct hwloc_topology, topology_abi); assert(offset == 0); offset = offsetof(struct hwloc_topology, adopted_shmem_addr); - assert(offset == 224); + assert(offset == 232); offset = offsetof(struct hwloc_topology, binding_hooks); - assert(offset == 448); + assert(offset == 456); size = sizeof(struct hwloc_binding_hooks); assert(size == 192); offset = offsetof(struct hwloc_topology, support); - assert(offset == 640); + assert(offset == 648); offset = offsetof(struct hwloc_topology, first_dist); - assert(offset == 688); + assert(offset == 696); size = sizeof(struct hwloc_internal_distances_s); - assert(size == 64); + assert(size == 88); offset = offsetof(struct hwloc_topology, grouping_next_subkind); - assert(offset == 740); + assert(offset == 748); /* fields after this one aren't needed after discovery */ /* check bitmap ABI too, but those fields are private to bitmap.c */ - printf("checking bitmaps for topology ABI 0x%x...\n", HWLOC_TOPOLOGY_ABI); + printf("checking bitmaps for topology ABI 0x%x...\n", (unsigned) HWLOC_TOPOLOGY_ABI); { hwloc_bitmap_t set = hwloc_bitmap_alloc(); unsigned *ulongs_count = (unsigned*) (((char*)set) ); diff --git a/tests/hwloc/intel-mic.c b/tests/hwloc/intel-mic.c deleted file mode 100644 index e4b6869c8b..0000000000 --- a/tests/hwloc/intel-mic.c +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright © 2013-2018 Inria. All rights reserved. - * See COPYING in top-level directory. - */ - -#include -#include - -#include "hwloc.h" -#include "hwloc/intel-mic.h" - -int main(void) -{ - hwloc_topology_t topology; - int i; - int err; - - hwloc_topology_init(&topology); - hwloc_topology_set_type_filter(topology, HWLOC_OBJ_PCI_DEVICE, HWLOC_TYPE_FILTER_KEEP_IMPORTANT); - hwloc_topology_set_type_filter(topology, HWLOC_OBJ_OS_DEVICE, HWLOC_TYPE_FILTER_KEEP_IMPORTANT); - hwloc_topology_load(topology); - - for(i=0; ; i++) { - hwloc_bitmap_t set; - hwloc_obj_t osdev, ancestor; - const char *value; - - osdev = hwloc_intel_mic_get_device_osdev_by_index(topology, i); - if (!osdev) - break; - assert(osdev); - - ancestor = hwloc_get_non_io_ancestor_obj(topology, osdev); - - printf("found OSDev %s\n", osdev->name); - err = strncmp(osdev->name, "mic", 3); - assert(!err); - assert(atoi(osdev->name+3) == (int) i); - - assert(osdev->attr->osdev.type == HWLOC_OBJ_OSDEV_COPROC); - - value = osdev->subtype; - assert(value); - err = strcmp(value, "MIC"); - assert(!err); - - value = hwloc_obj_get_info_by_name(osdev, "MICFamily"); - printf("found MICFamily %s\n", value); - value = hwloc_obj_get_info_by_name(osdev, "MICSKU"); - printf("found MICSKU %s\n", value); - value = hwloc_obj_get_info_by_name(osdev, "MICActiveCores"); - printf("found MICActiveCores %s\n", value); - value = hwloc_obj_get_info_by_name(osdev, "MICMemorySize"); - printf("found MICMemorySize %s\n", value); - - set = hwloc_bitmap_alloc(); - err = hwloc_intel_mic_get_device_cpuset(topology, i, set); - if (err < 0) { - printf("failed to get cpuset for device %d\n", i); - } else { - char *cpuset_string = NULL; - hwloc_bitmap_asprintf(&cpuset_string, set); - printf("got cpuset %s for device %d\n", cpuset_string, i); - if (hwloc_bitmap_isequal(hwloc_topology_get_complete_cpuset(topology), hwloc_topology_get_topology_cpuset(topology))) - /* only compare if the topology is complete, otherwise things can be significantly different */ - assert(hwloc_bitmap_isequal(set, ancestor->cpuset)); - free(cpuset_string); - } - hwloc_bitmap_free(set); - } - - hwloc_topology_destroy(topology); - - return 0; -} diff --git a/tests/hwloc/linux/128ia64-17n4s2c.output b/tests/hwloc/linux/128ia64-17n4s2c.output index c5a8302959..a73f298f45 100644 --- a/tests/hwloc/linux/128ia64-17n4s2c.output +++ b/tests/hwloc/linux/128ia64-17n4s2c.output @@ -359,7 +359,7 @@ depth 0: 1 Machine (type #0) depth 3: 128 Core (type #2) depth 4: 128 PU (type #3) Special depth -3: 17 NUMANode (type #13) -Relative latency matrix (kind 5) between 17 NUMANodes (depth -3) by logical indexes: +Relative latency matrix (name NUMALatency kind 5) between 17 NUMANodes (depth -3) by logical indexes: index 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 0 10 17 17 17 20 20 20 20 20 20 20 20 20 20 20 20 14 1 17 10 17 17 20 20 20 20 20 20 20 20 20 20 20 20 14 diff --git a/tests/hwloc/linux/16amd64-4n4c-cgroup-distance-merge.output b/tests/hwloc/linux/16amd64-4n4c-cgroup-distance-merge.output index 4e354d4e0c..6194137c4e 100644 --- a/tests/hwloc/linux/16amd64-4n4c-cgroup-distance-merge.output +++ b/tests/hwloc/linux/16amd64-4n4c-cgroup-distance-merge.output @@ -14,7 +14,7 @@ depth 0: 1 Machine (type #0) depth 2: 1 L3Cache (type #6) depth 3: 4 PU (type #3) Special depth -3: 2 NUMANode (type #13) -Relative latency matrix (kind 5) between 2 NUMANodes (depth -3) by logical indexes: +Relative latency matrix (name NUMALatency kind 5) between 2 NUMANodes (depth -3) by logical indexes: index 0 1 0 10 20 1 20 10 diff --git a/tests/hwloc/linux/16amd64-8n2c-cpusets.output b/tests/hwloc/linux/16amd64-8n2c-cpusets.output index 98b588193f..a9ed018440 100644 --- a/tests/hwloc/linux/16amd64-8n2c-cpusets.output +++ b/tests/hwloc/linux/16amd64-8n2c-cpusets.output @@ -68,7 +68,7 @@ depth 0: 1 Machine (type #0) depth 5: 10 Core (type #2) depth 6: 10 PU (type #3) Special depth -3: 4 NUMANode (type #13) -Relative latency matrix (kind 5) between 4 NUMANodes (depth -3) by logical indexes: +Relative latency matrix (name NUMALatency kind 5) between 4 NUMANodes (depth -3) by logical indexes: index 0 1 2 3 0 10 20 20 20 1 20 10 20 20 diff --git a/tests/hwloc/linux/16amd64-8n2c-cpusets.xml.output b/tests/hwloc/linux/16amd64-8n2c-cpusets.xml.output index 1d10e34e15..d37b651fed 100644 --- a/tests/hwloc/linux/16amd64-8n2c-cpusets.xml.output +++ b/tests/hwloc/linux/16amd64-8n2c-cpusets.xml.output @@ -112,7 +112,7 @@ - + 1 2 3 4 10 20 20 20 20 10 20 20 20 20 10 20 20 20 20 10 diff --git a/tests/hwloc/linux/16amd64-8n2c-cpusets_noadmin.output b/tests/hwloc/linux/16amd64-8n2c-cpusets_noadmin.output index 1dfb97d7e3..d3d83f9fec 100644 --- a/tests/hwloc/linux/16amd64-8n2c-cpusets_noadmin.output +++ b/tests/hwloc/linux/16amd64-8n2c-cpusets_noadmin.output @@ -98,7 +98,7 @@ depth 0: 1 Machine (type #0) depth 5: 15 Core (type #2) depth 6: 15 PU (type #3) Special depth -3: 8 NUMANode (type #13) -Relative latency matrix (kind 5) between 8 NUMANodes (depth -3) by logical indexes: +Relative latency matrix (name NUMALatency kind 5) between 8 NUMANodes (depth -3) by logical indexes: index 0 1 2 3 4 5 6 7 0 10 20 20 20 20 20 20 20 1 20 10 20 20 20 20 20 20 diff --git a/tests/hwloc/linux/16amd64-8n2c.output b/tests/hwloc/linux/16amd64-8n2c.output index af32d720d1..122bf963e0 100644 --- a/tests/hwloc/linux/16amd64-8n2c.output +++ b/tests/hwloc/linux/16amd64-8n2c.output @@ -103,7 +103,7 @@ depth 0: 1 Machine (type #0) depth 5: 16 Core (type #2) depth 6: 16 PU (type #3) Special depth -3: 8 NUMANode (type #13) -Relative latency matrix (kind 5) between 8 NUMANodes (depth -3) by logical indexes: +Relative latency matrix (name NUMALatency kind 5) between 8 NUMANodes (depth -3) by logical indexes: index 0 1 2 3 4 5 6 7 0 10 20 20 20 20 20 20 20 1 20 10 20 20 20 20 20 20 diff --git a/tests/hwloc/linux/16ia64-8n2s.output b/tests/hwloc/linux/16ia64-8n2s.output index f8df3038e8..d5d4bdfe51 100644 --- a/tests/hwloc/linux/16ia64-8n2s.output +++ b/tests/hwloc/linux/16ia64-8n2s.output @@ -140,7 +140,7 @@ depth 0: 1 Machine (type #0) depth 8: 16 Core (type #2) depth 9: 16 PU (type #3) Special depth -3: 8 NUMANode (type #13) -Relative latency matrix (kind 5) between 8 NUMANodes (depth -3) by logical indexes: +Relative latency matrix (name NUMALatency kind 5) between 8 NUMANodes (depth -3) by logical indexes: index 0 1 2 3 4 5 6 7 0 10 25 25 25 29 29 29 29 1 25 10 25 25 29 29 29 29 diff --git a/tests/hwloc/linux/256ia64-64n2s2c.output b/tests/hwloc/linux/256ia64-64n2s2c.output index 5489fef731..41bf38322c 100644 --- a/tests/hwloc/linux/256ia64-64n2s2c.output +++ b/tests/hwloc/linux/256ia64-64n2s2c.output @@ -795,7 +795,7 @@ depth 0: 1 Machine (type #0) depth 5: 256 Core (type #2) depth 6: 256 PU (type #3) Special depth -3: 64 NUMANode (type #13) -Relative latency matrix (kind 5) between 64 NUMANodes (depth -3) by logical indexes: +Relative latency matrix (name NUMALatency kind 5) between 64 NUMANodes (depth -3) by logical indexes: index 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 0 10 22 22 22 26 26 26 26 26 26 26 26 30 30 30 30 30 30 30 30 34 34 34 34 30 30 30 30 34 34 34 34 30 30 30 30 34 34 34 34 30 30 30 30 34 34 34 34 30 30 30 30 34 34 34 34 30 30 30 30 34 34 34 34 1 22 10 22 22 26 26 26 26 26 26 26 26 30 30 30 30 30 30 30 30 34 34 34 34 30 30 30 30 34 34 34 34 30 30 30 30 34 34 34 34 30 30 30 30 34 34 34 34 30 30 30 30 34 34 34 34 30 30 30 30 34 34 34 34 diff --git a/tests/hwloc/linux/256ppc-8n8s4t-nocache.output b/tests/hwloc/linux/256ppc-8n8s4t-nocache.output index 1b2bb07753..526b0e960c 100644 --- a/tests/hwloc/linux/256ppc-8n8s4t-nocache.output +++ b/tests/hwloc/linux/256ppc-8n8s4t-nocache.output @@ -670,7 +670,7 @@ depth 0: 1 Machine (type #0) depth 8: 64 Core (type #2) depth 9: 256 PU (type #3) Special depth -3: 8 NUMANode (type #13) -Relative latency matrix (kind 5) between 8 NUMANodes (depth -3) by logical indexes: +Relative latency matrix (name NUMALatency kind 5) between 8 NUMANodes (depth -3) by logical indexes: index 0 1 2 3 4 5 6 7 0 10 20 40 40 40 40 40 40 1 20 10 40 40 40 40 40 40 diff --git a/tests/hwloc/linux/256ppc-8n8s4t.output b/tests/hwloc/linux/256ppc-8n8s4t.output index 1b2bb07753..526b0e960c 100644 --- a/tests/hwloc/linux/256ppc-8n8s4t.output +++ b/tests/hwloc/linux/256ppc-8n8s4t.output @@ -670,7 +670,7 @@ depth 0: 1 Machine (type #0) depth 8: 64 Core (type #2) depth 9: 256 PU (type #3) Special depth -3: 8 NUMANode (type #13) -Relative latency matrix (kind 5) between 8 NUMANodes (depth -3) by logical indexes: +Relative latency matrix (name NUMALatency kind 5) between 8 NUMANodes (depth -3) by logical indexes: index 0 1 2 3 4 5 6 7 0 10 20 40 40 40 40 40 40 1 20 10 40 40 40 40 40 40 diff --git a/tests/hwloc/linux/28em64t-2s2n7c-buggycoresiblings.output b/tests/hwloc/linux/28em64t-2s2n7c-buggycoresiblings.output deleted file mode 100644 index 1c6dae6cc8..0000000000 --- a/tests/hwloc/linux/28em64t-2s2n7c-buggycoresiblings.output +++ /dev/null @@ -1,167 +0,0 @@ -Machine (P#0 total=66835164KB DMIProductName=UCSB-B200-M4 DMIProductVersion=0 DMIProductSerial=FCH18187KYL DMIProductUUID=5C29B631-18D7-46D5-BBDB-E43F106A86D5 DMIBoardVendor="Cisco Systems Inc" DMIBoardName=UCSB-B200-M4 DMIBoardVersion=73-15862-03 DMIBoardSerial=FCH18187KYL DMIBoardAssetTag=" " DMIChassisVendor="Cisco Systems Inc" DMIChassisType=18 DMIChassisVersion=68-4777-02 DMIChassisSerial=FOX1802H2KC DMIChassisAssetTag=" " DMIBIOSVendor="Cisco Systems, Inc." DMIBIOSVersion=B200M4.2.2.3.0.080820140005 DMIBIOSDate=08/08/2014 DMISysVendor="Cisco Systems Inc" Backend=Linux LinuxCgroup=/ OSName=Linux OSRelease=3.10.0-123.el7.x86_64 OSVersion="#1 SMP Mon May 5 11:16:57 EDT 2014" HostName=localhost.localdomain Architecture=x86_64) - Package L#0 (P#0 total=33280732KB CPUVendor=GenuineIntel CPUFamilyNumber=6 CPUModelNumber=63 CPUModel="Intel(R) Xeon(R) CPU E5-2683 v3 @ 2.00GHz" CPUStepping=2) - L3Cache L#0 (total=16503516KB size=17920KB linesize=64 ways=20) - NUMANode L#0 (P#0 local=16503516KB total=16503516KB) - L2Cache L#0 (size=256KB linesize=64 ways=8) - L1dCache L#0 (size=32KB linesize=64 ways=8) - L1iCache L#0 (size=32KB linesize=64 ways=8) - Core L#0 (P#0) - PU L#0 (P#0) - L2Cache L#1 (size=256KB linesize=64 ways=8) - L1dCache L#1 (size=32KB linesize=64 ways=8) - L1iCache L#1 (size=32KB linesize=64 ways=8) - Core L#1 (P#1) - PU L#1 (P#1) - L2Cache L#2 (size=256KB linesize=64 ways=8) - L1dCache L#2 (size=32KB linesize=64 ways=8) - L1iCache L#2 (size=32KB linesize=64 ways=8) - Core L#2 (P#2) - PU L#2 (P#2) - L2Cache L#3 (size=256KB linesize=64 ways=8) - L1dCache L#3 (size=32KB linesize=64 ways=8) - L1iCache L#3 (size=32KB linesize=64 ways=8) - Core L#3 (P#3) - PU L#3 (P#3) - L2Cache L#4 (size=256KB linesize=64 ways=8) - L1dCache L#4 (size=32KB linesize=64 ways=8) - L1iCache L#4 (size=32KB linesize=64 ways=8) - Core L#4 (P#4) - PU L#4 (P#4) - L2Cache L#5 (size=256KB linesize=64 ways=8) - L1dCache L#5 (size=32KB linesize=64 ways=8) - L1iCache L#5 (size=32KB linesize=64 ways=8) - Core L#5 (P#5) - PU L#5 (P#5) - L2Cache L#6 (size=256KB linesize=64 ways=8) - L1dCache L#6 (size=32KB linesize=64 ways=8) - L1iCache L#6 (size=32KB linesize=64 ways=8) - Core L#6 (P#6) - PU L#6 (P#6) - L3Cache L#1 (total=16777216KB size=17920KB linesize=64 ways=20) - NUMANode L#1 (P#1 local=16777216KB total=16777216KB) - L2Cache L#7 (size=256KB linesize=64 ways=8) - L1dCache L#7 (size=32KB linesize=64 ways=8) - L1iCache L#7 (size=32KB linesize=64 ways=8) - Core L#7 (P#8) - PU L#7 (P#7) - L2Cache L#8 (size=256KB linesize=64 ways=8) - L1dCache L#8 (size=32KB linesize=64 ways=8) - L1iCache L#8 (size=32KB linesize=64 ways=8) - Core L#8 (P#9) - PU L#8 (P#8) - L2Cache L#9 (size=256KB linesize=64 ways=8) - L1dCache L#9 (size=32KB linesize=64 ways=8) - L1iCache L#9 (size=32KB linesize=64 ways=8) - Core L#9 (P#10) - PU L#9 (P#9) - L2Cache L#10 (size=256KB linesize=64 ways=8) - L1dCache L#10 (size=32KB linesize=64 ways=8) - L1iCache L#10 (size=32KB linesize=64 ways=8) - Core L#10 (P#11) - PU L#10 (P#10) - L2Cache L#11 (size=256KB linesize=64 ways=8) - L1dCache L#11 (size=32KB linesize=64 ways=8) - L1iCache L#11 (size=32KB linesize=64 ways=8) - Core L#11 (P#12) - PU L#11 (P#11) - L2Cache L#12 (size=256KB linesize=64 ways=8) - L1dCache L#12 (size=32KB linesize=64 ways=8) - L1iCache L#12 (size=32KB linesize=64 ways=8) - Core L#12 (P#13) - PU L#12 (P#12) - L2Cache L#13 (size=256KB linesize=64 ways=8) - L1dCache L#13 (size=32KB linesize=64 ways=8) - L1iCache L#13 (size=32KB linesize=64 ways=8) - Core L#13 (P#14) - PU L#13 (P#13) - Package L#1 (P#1 total=33554432KB CPUVendor=GenuineIntel CPUFamilyNumber=6 CPUModelNumber=63 CPUModel="Intel(R) Xeon(R) CPU E5-2683 v3 @ 2.00GHz" CPUStepping=2) - L3Cache L#2 (total=16777216KB size=17920KB linesize=64 ways=20) - NUMANode L#2 (P#2 local=16777216KB total=16777216KB) - L2Cache L#14 (size=256KB linesize=64 ways=8) - L1dCache L#14 (size=32KB linesize=64 ways=8) - L1iCache L#14 (size=32KB linesize=64 ways=8) - Core L#14 (P#0) - PU L#14 (P#14) - L2Cache L#15 (size=256KB linesize=64 ways=8) - L1dCache L#15 (size=32KB linesize=64 ways=8) - L1iCache L#15 (size=32KB linesize=64 ways=8) - Core L#15 (P#1) - PU L#15 (P#15) - L2Cache L#16 (size=256KB linesize=64 ways=8) - L1dCache L#16 (size=32KB linesize=64 ways=8) - L1iCache L#16 (size=32KB linesize=64 ways=8) - Core L#16 (P#2) - PU L#16 (P#16) - L2Cache L#17 (size=256KB linesize=64 ways=8) - L1dCache L#17 (size=32KB linesize=64 ways=8) - L1iCache L#17 (size=32KB linesize=64 ways=8) - Core L#17 (P#3) - PU L#17 (P#17) - L2Cache L#18 (size=256KB linesize=64 ways=8) - L1dCache L#18 (size=32KB linesize=64 ways=8) - L1iCache L#18 (size=32KB linesize=64 ways=8) - Core L#18 (P#4) - PU L#18 (P#18) - L2Cache L#19 (size=256KB linesize=64 ways=8) - L1dCache L#19 (size=32KB linesize=64 ways=8) - L1iCache L#19 (size=32KB linesize=64 ways=8) - Core L#19 (P#5) - PU L#19 (P#19) - L2Cache L#20 (size=256KB linesize=64 ways=8) - L1dCache L#20 (size=32KB linesize=64 ways=8) - L1iCache L#20 (size=32KB linesize=64 ways=8) - Core L#20 (P#6) - PU L#20 (P#20) - L3Cache L#3 (total=16777216KB size=17920KB linesize=64 ways=20) - NUMANode L#3 (P#3 local=16777216KB total=16777216KB) - L2Cache L#21 (size=256KB linesize=64 ways=8) - L1dCache L#21 (size=32KB linesize=64 ways=8) - L1iCache L#21 (size=32KB linesize=64 ways=8) - Core L#21 (P#8) - PU L#21 (P#21) - L2Cache L#22 (size=256KB linesize=64 ways=8) - L1dCache L#22 (size=32KB linesize=64 ways=8) - L1iCache L#22 (size=32KB linesize=64 ways=8) - Core L#22 (P#9) - PU L#22 (P#22) - L2Cache L#23 (size=256KB linesize=64 ways=8) - L1dCache L#23 (size=32KB linesize=64 ways=8) - L1iCache L#23 (size=32KB linesize=64 ways=8) - Core L#23 (P#10) - PU L#23 (P#23) - L2Cache L#24 (size=256KB linesize=64 ways=8) - L1dCache L#24 (size=32KB linesize=64 ways=8) - L1iCache L#24 (size=32KB linesize=64 ways=8) - Core L#24 (P#11) - PU L#24 (P#24) - L2Cache L#25 (size=256KB linesize=64 ways=8) - L1dCache L#25 (size=32KB linesize=64 ways=8) - L1iCache L#25 (size=32KB linesize=64 ways=8) - Core L#25 (P#12) - PU L#25 (P#25) - L2Cache L#26 (size=256KB linesize=64 ways=8) - L1dCache L#26 (size=32KB linesize=64 ways=8) - L1iCache L#26 (size=32KB linesize=64 ways=8) - Core L#26 (P#13) - PU L#26 (P#26) - L2Cache L#27 (size=256KB linesize=64 ways=8) - L1dCache L#27 (size=32KB linesize=64 ways=8) - L1iCache L#27 (size=32KB linesize=64 ways=8) - Core L#27 (P#14) - PU L#27 (P#27) -depth 0: 1 Machine (type #0) - depth 1: 2 Package (type #1) - depth 2: 4 L3Cache (type #6) - depth 3: 28 L2Cache (type #5) - depth 4: 28 L1dCache (type #4) - depth 5: 28 L1iCache (type #9) - depth 6: 28 Core (type #2) - depth 7: 28 PU (type #3) -Special depth -3: 4 NUMANode (type #13) -Relative latency matrix (kind 5) between 4 NUMANodes (depth -3) by logical indexes: - index 0 1 2 3 - 0 10 21 31 31 - 1 21 10 31 31 - 2 31 31 10 21 - 3 31 31 21 10 -Topology not from this system diff --git a/tests/hwloc/linux/28em64t-2s2n7c-buggycoresiblings.tar.bz2 b/tests/hwloc/linux/28em64t-2s2n7c-buggycoresiblings.tar.bz2 deleted file mode 100644 index 29183feae1..0000000000 Binary files a/tests/hwloc/linux/28em64t-2s2n7c-buggycoresiblings.tar.bz2 and /dev/null differ diff --git a/tests/hwloc/linux/2amd64-2n.output b/tests/hwloc/linux/2amd64-2n.output index 6a985b4957..20617c8a34 100644 --- a/tests/hwloc/linux/2amd64-2n.output +++ b/tests/hwloc/linux/2amd64-2n.output @@ -21,7 +21,7 @@ depth 0: 1 Machine (type #0) depth 5: 2 Core (type #2) depth 6: 2 PU (type #3) Special depth -3: 2 NUMANode (type #13) -Relative latency matrix (kind 5) between 2 NUMANodes (depth -3) by logical indexes: +Relative latency matrix (name NUMALatency kind 5) between 2 NUMANodes (depth -3) by logical indexes: index 0 1 0 10 20 1 20 10 diff --git a/tests/hwloc/linux/32amd64-4s2n4c-cgroup.output b/tests/hwloc/linux/32amd64-4s2n4c-cgroup.output index 3e6f30fad5..0ae83d4e7f 100644 --- a/tests/hwloc/linux/32amd64-4s2n4c-cgroup.output +++ b/tests/hwloc/linux/32amd64-4s2n4c-cgroup.output @@ -53,7 +53,7 @@ depth 0: 1 Machine (type #0) depth 6: 6 Core (type #2) depth 7: 6 PU (type #3) Special depth -3: 6 NUMANode (type #13) -Relative latency matrix (kind 5) between 6 NUMANodes (depth -3) by logical indexes: +Relative latency matrix (name NUMALatency kind 5) between 6 NUMANodes (depth -3) by logical indexes: index 0 1 2 3 4 5 0 10 16 16 22 16 22 1 16 10 22 16 22 16 diff --git a/tests/hwloc/linux/32amd64-4s2n4c-cgroup.xml.output b/tests/hwloc/linux/32amd64-4s2n4c-cgroup.xml.output index 0d0c006e77..1b42de65e2 100644 --- a/tests/hwloc/linux/32amd64-4s2n4c-cgroup.xml.output +++ b/tests/hwloc/linux/32amd64-4s2n4c-cgroup.xml.output @@ -100,7 +100,7 @@ - + 0 1 2 3 4 5 10 16 16 22 16 22 16 10 22 16 22 16 16 22 10 16 16 22 22 16 diff --git a/tests/hwloc/linux/32em64t-2n8c+1mic.output b/tests/hwloc/linux/32em64t-2n8c+1mic.output index 64487e4fa6..440ce53fbb 100644 --- a/tests/hwloc/linux/32em64t-2n8c+1mic.output +++ b/tests/hwloc/linux/32em64t-2n8c+1mic.output @@ -291,13 +291,6 @@ - - - - - - - @@ -501,7 +494,7 @@ - + 0 1 10 21 21 10 diff --git a/tests/hwloc/linux/40intel64-2g2n4c+pci.output b/tests/hwloc/linux/40intel64-2g2n4c+pci.output index f5c9a12d19..646dd46b6c 100644 --- a/tests/hwloc/linux/40intel64-2g2n4c+pci.output +++ b/tests/hwloc/linux/40intel64-2g2n4c+pci.output @@ -506,7 +506,7 @@ - + 0 1 2 3 10 20 20 20 20 10 20 20 20 20 10 20 20 20 20 10 diff --git a/tests/hwloc/linux/40intel64-4n10c+pci-conflicts.output b/tests/hwloc/linux/40intel64-4n10c+pci-conflicts.output index 0b4e2351cc..f708dfbfbd 100644 --- a/tests/hwloc/linux/40intel64-4n10c+pci-conflicts.output +++ b/tests/hwloc/linux/40intel64-4n10c+pci-conflicts.output @@ -225,7 +225,7 @@ Special depth -3: 4 NUMANode (type #13) Special depth -4: 2 Bridge (type #14) Special depth -5: 1 PCIDev (type #15) Special depth -6: 1 OSDev (type #16) -Relative latency matrix (kind 5) between 4 NUMANodes (depth -3) by logical indexes: +Relative latency matrix (name NUMALatency kind 5) between 4 NUMANodes (depth -3) by logical indexes: index 0 1 2 3 0 10 20 20 20 1 20 10 20 20 diff --git a/tests/hwloc/linux/48amd64-4d2n6c-sparse.output b/tests/hwloc/linux/48amd64-4d2n6c-sparse.output index 90d8b2ae44..d3751e78c1 100644 --- a/tests/hwloc/linux/48amd64-4d2n6c-sparse.output +++ b/tests/hwloc/linux/48amd64-4d2n6c-sparse.output @@ -268,7 +268,7 @@ depth 0: 1 Machine (type #0) depth 6: 48 Core (type #2) depth 7: 48 PU (type #3) Special depth -3: 8 NUMANode (type #13) -Relative latency matrix (kind 5) between 8 NUMANodes (depth -3) by logical indexes: +Relative latency matrix (name NUMALatency kind 5) between 8 NUMANodes (depth -3) by logical indexes: index 0 1 2 3 4 5 6 7 0 10 16 16 22 16 22 16 22 1 16 10 22 16 16 22 22 16 diff --git a/tests/hwloc/linux/64amd64-4s2n4ca2co.output b/tests/hwloc/linux/64amd64-4s2n4ca2co.output index 02a2d0472e..ccfc845bbc 100644 --- a/tests/hwloc/linux/64amd64-4s2n4ca2co.output +++ b/tests/hwloc/linux/64amd64-4s2n4ca2co.output @@ -284,7 +284,7 @@ depth 0: 1 Machine (type #0) depth 6: 64 Core (type #2) depth 7: 64 PU (type #3) Special depth -3: 8 NUMANode (type #13) -Relative latency matrix (kind 5) between 8 NUMANodes (depth -3) by logical indexes: +Relative latency matrix (name NUMALatency kind 5) between 8 NUMANodes (depth -3) by logical indexes: index 0 1 2 3 4 5 6 7 0 10 16 16 22 16 22 16 22 1 16 10 22 16 16 22 22 16 diff --git a/tests/hwloc/linux/8amd64-4n2c.output b/tests/hwloc/linux/8amd64-4n2c.output index 375cfa75e3..b3c7b4cf39 100644 --- a/tests/hwloc/linux/8amd64-4n2c.output +++ b/tests/hwloc/linux/8amd64-4n2c.output @@ -55,7 +55,7 @@ depth 0: 1 Machine (type #0) depth 5: 8 Core (type #2) depth 6: 8 PU (type #3) Special depth -3: 4 NUMANode (type #13) -Relative latency matrix (kind 5) between 4 NUMANodes (depth -3) by logical indexes: +Relative latency matrix (name NUMALatency kind 5) between 4 NUMANodes (depth -3) by logical indexes: index 0 1 2 3 0 10 20 20 20 1 20 10 20 20 diff --git a/tests/hwloc/linux/8ia64-2n2s2c.output b/tests/hwloc/linux/8ia64-2n2s2c.output index b38fa607a4..885de6db44 100644 --- a/tests/hwloc/linux/8ia64-2n2s2c.output +++ b/tests/hwloc/linux/8ia64-2n2s2c.output @@ -74,7 +74,7 @@ depth 0: 1 Machine (type #0) depth 8: 8 Core (type #2) depth 9: 8 PU (type #3) Special depth -3: 2 NUMANode (type #13) -Relative latency matrix (kind 5) between 2 NUMANodes (depth -3) by logical indexes: +Relative latency matrix (name NUMALatency kind 5) between 2 NUMANodes (depth -3) by logical indexes: index 0 1 0 10 15 1 15 10 diff --git a/tests/hwloc/linux/96em64t-4n4d3ca2co.output b/tests/hwloc/linux/96em64t-4n4d3ca2co.output index a49dcbcff3..8c86623b19 100644 --- a/tests/hwloc/linux/96em64t-4n4d3ca2co.output +++ b/tests/hwloc/linux/96em64t-4n4d3ca2co.output @@ -481,7 +481,7 @@ depth 0: 1 Machine (type #0) depth 7: 96 Core (type #2) depth 8: 96 PU (type #3) Special depth -3: 4 NUMANode (type #13) -Relative latency matrix (kind 5) between 4 NUMANodes (depth -3) by logical indexes: +Relative latency matrix (name NUMALatency kind 5) between 4 NUMANodes (depth -3) by logical indexes: index 0 1 2 3 0 10 26 26 26 1 26 10 26 26 diff --git a/tests/hwloc/linux/Makefile.am b/tests/hwloc/linux/Makefile.am index 13a4f59416..d074d0de55 100644 --- a/tests/hwloc/linux/Makefile.am +++ b/tests/hwloc/linux/Makefile.am @@ -19,7 +19,6 @@ sysfs_outputs = \ 4fake-4gr1nu1pu.output \ 256ppc-8n8s4t.output \ 256ppc-8n8s4t-nocache.output \ - 28em64t-2s2n7c-buggycoresiblings.output \ 32amd64-4s2n4c-cgroup.output \ 32amd64-4s2n4c-cgroup.xml.output \ 8amd64-4n2c.output \ @@ -63,6 +62,7 @@ sysfs_outputs = \ nvidiagpunumanodes.output \ nvidiagpunumanodes.kept.output \ fakememinitiators-1npc+1npc.output \ + fakecpuid1f-64intel64-2p4d2n2c2t.output \ fakeheteronuma.output # Each output `xyz.output' must have a corresponding tarball `xyz.tar.bz2' @@ -79,7 +79,6 @@ sysfs_tarballs = \ 4fake-4gr1nu1pu.tar.bz2 \ 256ppc-8n8s4t.tar.bz2 \ 256ppc-8n8s4t-nocache.source \ - 28em64t-2s2n7c-buggycoresiblings.tar.bz2 \ 32amd64-4s2n4c-cgroup.tar.bz2 \ 32amd64-4s2n4c-cgroup.xml.source \ 8amd64-4n2c.tar.bz2 \ @@ -123,6 +122,7 @@ sysfs_tarballs = \ nvidiagpunumanodes.tar.bz2 \ nvidiagpunumanodes.kept.source \ fakememinitiators-1npc+1npc.tar.bz2 \ + fakecpuid1f-64intel64-2p4d2n2c2t.tar.bz2 \ fakeheteronuma.tar.bz2 # Each output `xyz.output' may have a corresponding exclude `xyz.exclude' diff --git a/tests/hwloc/linux/fakecpuid1f-64intel64-2p4d2n2c2t.output b/tests/hwloc/linux/fakecpuid1f-64intel64-2p4d2n2c2t.output new file mode 100644 index 0000000000..dc8665c8c6 --- /dev/null +++ b/tests/hwloc/linux/fakecpuid1f-64intel64-2p4d2n2c2t.output @@ -0,0 +1,339 @@ +Machine (P#0 total=4025476KB DMIProductName="Standard PC (i440FX + PIIX, 1996)" DMIProductVersion=pc-i440fx-3.1 DMIChassisVendor=QEMU DMIChassisType=1 DMIChassisVersion=pc-i440fx-3.1 DMIChassisAssetTag= DMIBIOSVendor=SeaBIOS DMIBIOSVersion=1.12.0-1 DMIBIOSDate=04/01/2014 DMISysVendor=QEMU Backend=Linux LinuxCgroup=/ OSName=Linux OSRelease=5.0.0-rc7 OSVersion="#1 SMP Tue Feb 26 08:48:46 CET 2019" HostName=debian Architecture=x86_64) + Package L#0 (P#0 total=1028812KB CPUVendor=GenuineIntel CPUFamilyNumber=6 CPUModelNumber=6 CPUModel="QEMU Virtual CPU version 2.5+" CPUStepping=3) + Die L#0 (P#0 total=514156KB) + L3Cache L#0 (total=514156KB size=16384KB linesize=64 ways=16) + Group0 L#0 (total=256828KB) + NUMANode L#0 (P#0 local=256828KB total=256828KB) + L2Cache L#0 (size=4096KB linesize=64 ways=16) + Core L#0 (P#0) + L1dCache L#0 (size=32KB linesize=64 ways=8) + L1iCache L#0 (size=32KB linesize=64 ways=8) + PU L#0 (P#0) + L1dCache L#1 (size=32KB linesize=64 ways=8) + L1iCache L#1 (size=32KB linesize=64 ways=8) + PU L#1 (P#1) + L2Cache L#1 (size=4096KB linesize=64 ways=16) + Core L#1 (P#1) + L1dCache L#2 (size=32KB linesize=64 ways=8) + L1iCache L#2 (size=32KB linesize=64 ways=8) + PU L#2 (P#2) + L1dCache L#3 (size=32KB linesize=64 ways=8) + L1iCache L#3 (size=32KB linesize=64 ways=8) + PU L#3 (P#3) + Group0 L#1 (total=257328KB) + NUMANode L#1 (P#1 local=257328KB total=257328KB) + L2Cache L#2 (size=4096KB linesize=64 ways=16) + Core L#2 (P#2) + L1dCache L#4 (size=32KB linesize=64 ways=8) + L1iCache L#4 (size=32KB linesize=64 ways=8) + PU L#4 (P#4) + L1dCache L#5 (size=32KB linesize=64 ways=8) + L1iCache L#5 (size=32KB linesize=64 ways=8) + PU L#5 (P#5) + L2Cache L#3 (size=4096KB linesize=64 ways=16) + Core L#3 (P#3) + L1dCache L#6 (size=32KB linesize=64 ways=8) + L1iCache L#6 (size=32KB linesize=64 ways=8) + PU L#6 (P#6) + L1dCache L#7 (size=32KB linesize=64 ways=8) + L1iCache L#7 (size=32KB linesize=64 ways=8) + PU L#7 (P#7) + Die L#1 (P#1 total=514656KB) + L3Cache L#1 (total=514656KB size=16384KB linesize=64 ways=16) + Group0 L#2 (total=257328KB) + NUMANode L#2 (P#2 local=257328KB total=257328KB) + L2Cache L#4 (size=4096KB linesize=64 ways=16) + Core L#4 (P#0) + L1dCache L#8 (size=32KB linesize=64 ways=8) + L1iCache L#8 (size=32KB linesize=64 ways=8) + PU L#8 (P#8) + L1dCache L#9 (size=32KB linesize=64 ways=8) + L1iCache L#9 (size=32KB linesize=64 ways=8) + PU L#9 (P#9) + L2Cache L#5 (size=4096KB linesize=64 ways=16) + Core L#5 (P#1) + L1dCache L#10 (size=32KB linesize=64 ways=8) + L1iCache L#10 (size=32KB linesize=64 ways=8) + PU L#10 (P#10) + L1dCache L#11 (size=32KB linesize=64 ways=8) + L1iCache L#11 (size=32KB linesize=64 ways=8) + PU L#11 (P#11) + Group0 L#3 (total=257328KB) + NUMANode L#3 (P#3 local=257328KB total=257328KB) + L2Cache L#6 (size=4096KB linesize=64 ways=16) + Core L#6 (P#2) + L1dCache L#12 (size=32KB linesize=64 ways=8) + L1iCache L#12 (size=32KB linesize=64 ways=8) + PU L#12 (P#12) + L1dCache L#13 (size=32KB linesize=64 ways=8) + L1iCache L#13 (size=32KB linesize=64 ways=8) + PU L#13 (P#13) + L2Cache L#7 (size=4096KB linesize=64 ways=16) + Core L#7 (P#3) + L1dCache L#14 (size=32KB linesize=64 ways=8) + L1iCache L#14 (size=32KB linesize=64 ways=8) + PU L#14 (P#14) + L1dCache L#15 (size=32KB linesize=64 ways=8) + L1iCache L#15 (size=32KB linesize=64 ways=8) + PU L#15 (P#15) + Package L#1 (P#1 total=1004832KB CPUVendor=GenuineIntel CPUFamilyNumber=6 CPUModelNumber=6 CPUModel="QEMU Virtual CPU version 2.5+" CPUStepping=3) + Die L#2 (P#0 total=514656KB) + L3Cache L#2 (total=514656KB size=16384KB linesize=64 ways=16) + Group0 L#4 (total=257328KB) + NUMANode L#4 (P#4 local=257328KB total=257328KB) + L2Cache L#8 (size=4096KB linesize=64 ways=16) + Core L#8 (P#0) + L1dCache L#16 (size=32KB linesize=64 ways=8) + L1iCache L#16 (size=32KB linesize=64 ways=8) + PU L#16 (P#16) + L1dCache L#17 (size=32KB linesize=64 ways=8) + L1iCache L#17 (size=32KB linesize=64 ways=8) + PU L#17 (P#17) + L2Cache L#9 (size=4096KB linesize=64 ways=16) + Core L#9 (P#1) + L1dCache L#18 (size=32KB linesize=64 ways=8) + L1iCache L#18 (size=32KB linesize=64 ways=8) + PU L#18 (P#18) + L1dCache L#19 (size=32KB linesize=64 ways=8) + L1iCache L#19 (size=32KB linesize=64 ways=8) + PU L#19 (P#19) + Group0 L#5 (total=257328KB) + NUMANode L#5 (P#5 local=257328KB total=257328KB) + L2Cache L#10 (size=4096KB linesize=64 ways=16) + Core L#10 (P#2) + L1dCache L#20 (size=32KB linesize=64 ways=8) + L1iCache L#20 (size=32KB linesize=64 ways=8) + PU L#20 (P#20) + L1dCache L#21 (size=32KB linesize=64 ways=8) + L1iCache L#21 (size=32KB linesize=64 ways=8) + PU L#21 (P#21) + L2Cache L#11 (size=4096KB linesize=64 ways=16) + Core L#11 (P#3) + L1dCache L#22 (size=32KB linesize=64 ways=8) + L1iCache L#22 (size=32KB linesize=64 ways=8) + PU L#22 (P#22) + L1dCache L#23 (size=32KB linesize=64 ways=8) + L1iCache L#23 (size=32KB linesize=64 ways=8) + PU L#23 (P#23) + Die L#3 (P#1 total=490176KB) + L3Cache L#3 (total=490176KB size=16384KB linesize=64 ways=16) + Group0 L#6 (total=257328KB) + NUMANode L#6 (P#6 local=257328KB total=257328KB) + L2Cache L#12 (size=4096KB linesize=64 ways=16) + Core L#12 (P#0) + L1dCache L#24 (size=32KB linesize=64 ways=8) + L1iCache L#24 (size=32KB linesize=64 ways=8) + PU L#24 (P#24) + L1dCache L#25 (size=32KB linesize=64 ways=8) + L1iCache L#25 (size=32KB linesize=64 ways=8) + PU L#25 (P#25) + L2Cache L#13 (size=4096KB linesize=64 ways=16) + Core L#13 (P#1) + L1dCache L#26 (size=32KB linesize=64 ways=8) + L1iCache L#26 (size=32KB linesize=64 ways=8) + PU L#26 (P#26) + L1dCache L#27 (size=32KB linesize=64 ways=8) + L1iCache L#27 (size=32KB linesize=64 ways=8) + PU L#27 (P#27) + Group0 L#7 (total=232848KB) + NUMANode L#7 (P#7 local=232848KB total=232848KB) + L2Cache L#14 (size=4096KB linesize=64 ways=16) + Core L#14 (P#2) + L1dCache L#28 (size=32KB linesize=64 ways=8) + L1iCache L#28 (size=32KB linesize=64 ways=8) + PU L#28 (P#28) + L1dCache L#29 (size=32KB linesize=64 ways=8) + L1iCache L#29 (size=32KB linesize=64 ways=8) + PU L#29 (P#29) + L2Cache L#15 (size=4096KB linesize=64 ways=16) + Core L#15 (P#3) + L1dCache L#30 (size=32KB linesize=64 ways=8) + L1iCache L#30 (size=32KB linesize=64 ways=8) + PU L#30 (P#30) + L1dCache L#31 (size=32KB linesize=64 ways=8) + L1iCache L#31 (size=32KB linesize=64 ways=8) + PU L#31 (P#31) + Package L#2 (P#2 total=963512KB CPUVendor=GenuineIntel CPUFamilyNumber=6 CPUModelNumber=6 CPUModel="QEMU Virtual CPU version 2.5+" CPUStepping=3) + Die L#4 (P#0 total=514656KB) + L3Cache L#4 (total=514656KB size=16384KB linesize=64 ways=16) + Group0 L#8 (total=257328KB) + NUMANode L#8 (P#8 local=257328KB total=257328KB) + L2Cache L#16 (size=4096KB linesize=64 ways=16) + Core L#16 (P#0) + L1dCache L#32 (size=32KB linesize=64 ways=8) + L1iCache L#32 (size=32KB linesize=64 ways=8) + PU L#32 (P#32) + L1dCache L#33 (size=32KB linesize=64 ways=8) + L1iCache L#33 (size=32KB linesize=64 ways=8) + PU L#33 (P#33) + L2Cache L#17 (size=4096KB linesize=64 ways=16) + Core L#17 (P#1) + L1dCache L#34 (size=32KB linesize=64 ways=8) + L1iCache L#34 (size=32KB linesize=64 ways=8) + PU L#34 (P#34) + L1dCache L#35 (size=32KB linesize=64 ways=8) + L1iCache L#35 (size=32KB linesize=64 ways=8) + PU L#35 (P#35) + Group0 L#9 (total=257328KB) + NUMANode L#9 (P#9 local=257328KB total=257328KB) + L2Cache L#18 (size=4096KB linesize=64 ways=16) + Core L#18 (P#2) + L1dCache L#36 (size=32KB linesize=64 ways=8) + L1iCache L#36 (size=32KB linesize=64 ways=8) + PU L#36 (P#36) + L1dCache L#37 (size=32KB linesize=64 ways=8) + L1iCache L#37 (size=32KB linesize=64 ways=8) + PU L#37 (P#37) + L2Cache L#19 (size=4096KB linesize=64 ways=16) + Core L#19 (P#3) + L1dCache L#38 (size=32KB linesize=64 ways=8) + L1iCache L#38 (size=32KB linesize=64 ways=8) + PU L#38 (P#38) + L1dCache L#39 (size=32KB linesize=64 ways=8) + L1iCache L#39 (size=32KB linesize=64 ways=8) + PU L#39 (P#39) + Die L#5 (P#1 total=448856KB) + L3Cache L#5 (total=448856KB size=16384KB linesize=64 ways=16) + Group0 L#10 (total=257328KB) + NUMANode L#10 (P#10 local=257328KB total=257328KB) + L2Cache L#20 (size=4096KB linesize=64 ways=16) + Core L#20 (P#0) + L1dCache L#40 (size=32KB linesize=64 ways=8) + L1iCache L#40 (size=32KB linesize=64 ways=8) + PU L#40 (P#40) + L1dCache L#41 (size=32KB linesize=64 ways=8) + L1iCache L#41 (size=32KB linesize=64 ways=8) + PU L#41 (P#41) + L2Cache L#21 (size=4096KB linesize=64 ways=16) + Core L#21 (P#1) + L1dCache L#42 (size=32KB linesize=64 ways=8) + L1iCache L#42 (size=32KB linesize=64 ways=8) + PU L#42 (P#42) + L1dCache L#43 (size=32KB linesize=64 ways=8) + L1iCache L#43 (size=32KB linesize=64 ways=8) + PU L#43 (P#43) + Group0 L#11 (total=191528KB) + NUMANode L#11 (P#11 local=191528KB total=191528KB) + L2Cache L#22 (size=4096KB linesize=64 ways=16) + Core L#22 (P#2) + L1dCache L#44 (size=32KB linesize=64 ways=8) + L1iCache L#44 (size=32KB linesize=64 ways=8) + PU L#44 (P#44) + L1dCache L#45 (size=32KB linesize=64 ways=8) + L1iCache L#45 (size=32KB linesize=64 ways=8) + PU L#45 (P#45) + L2Cache L#23 (size=4096KB linesize=64 ways=16) + Core L#23 (P#3) + L1dCache L#46 (size=32KB linesize=64 ways=8) + L1iCache L#46 (size=32KB linesize=64 ways=8) + PU L#46 (P#46) + L1dCache L#47 (size=32KB linesize=64 ways=8) + L1iCache L#47 (size=32KB linesize=64 ways=8) + PU L#47 (P#47) + Package L#3 (P#3 total=1028320KB CPUVendor=GenuineIntel CPUFamilyNumber=6 CPUModelNumber=6 CPUModel="QEMU Virtual CPU version 2.5+" CPUStepping=3) + Die L#6 (P#0 total=514656KB) + L3Cache L#6 (total=514656KB size=16384KB linesize=64 ways=16) + Group0 L#12 (total=257328KB) + NUMANode L#12 (P#12 local=257328KB total=257328KB) + L2Cache L#24 (size=4096KB linesize=64 ways=16) + Core L#24 (P#0) + L1dCache L#48 (size=32KB linesize=64 ways=8) + L1iCache L#48 (size=32KB linesize=64 ways=8) + PU L#48 (P#48) + L1dCache L#49 (size=32KB linesize=64 ways=8) + L1iCache L#49 (size=32KB linesize=64 ways=8) + PU L#49 (P#49) + L2Cache L#25 (size=4096KB linesize=64 ways=16) + Core L#25 (P#1) + L1dCache L#50 (size=32KB linesize=64 ways=8) + L1iCache L#50 (size=32KB linesize=64 ways=8) + PU L#50 (P#50) + L1dCache L#51 (size=32KB linesize=64 ways=8) + L1iCache L#51 (size=32KB linesize=64 ways=8) + PU L#51 (P#51) + Group0 L#13 (total=257328KB) + NUMANode L#13 (P#13 local=257328KB total=257328KB) + L2Cache L#26 (size=4096KB linesize=64 ways=16) + Core L#26 (P#2) + L1dCache L#52 (size=32KB linesize=64 ways=8) + L1iCache L#52 (size=32KB linesize=64 ways=8) + PU L#52 (P#52) + L1dCache L#53 (size=32KB linesize=64 ways=8) + L1iCache L#53 (size=32KB linesize=64 ways=8) + PU L#53 (P#53) + L2Cache L#27 (size=4096KB linesize=64 ways=16) + Core L#27 (P#3) + L1dCache L#54 (size=32KB linesize=64 ways=8) + L1iCache L#54 (size=32KB linesize=64 ways=8) + PU L#54 (P#54) + L1dCache L#55 (size=32KB linesize=64 ways=8) + L1iCache L#55 (size=32KB linesize=64 ways=8) + PU L#55 (P#55) + Die L#7 (P#1 total=513664KB) + L3Cache L#7 (total=513664KB size=16384KB linesize=64 ways=16) + Group0 L#14 (total=257328KB) + NUMANode L#14 (P#14 local=257328KB total=257328KB) + L2Cache L#28 (size=4096KB linesize=64 ways=16) + Core L#28 (P#0) + L1dCache L#56 (size=32KB linesize=64 ways=8) + L1iCache L#56 (size=32KB linesize=64 ways=8) + PU L#56 (P#56) + L1dCache L#57 (size=32KB linesize=64 ways=8) + L1iCache L#57 (size=32KB linesize=64 ways=8) + PU L#57 (P#57) + L2Cache L#29 (size=4096KB linesize=64 ways=16) + Core L#29 (P#1) + L1dCache L#58 (size=32KB linesize=64 ways=8) + L1iCache L#58 (size=32KB linesize=64 ways=8) + PU L#58 (P#58) + L1dCache L#59 (size=32KB linesize=64 ways=8) + L1iCache L#59 (size=32KB linesize=64 ways=8) + PU L#59 (P#59) + Group0 L#15 (total=256336KB) + NUMANode L#15 (P#15 local=256336KB total=256336KB) + L2Cache L#30 (size=4096KB linesize=64 ways=16) + Core L#30 (P#2) + L1dCache L#60 (size=32KB linesize=64 ways=8) + L1iCache L#60 (size=32KB linesize=64 ways=8) + PU L#60 (P#60) + L1dCache L#61 (size=32KB linesize=64 ways=8) + L1iCache L#61 (size=32KB linesize=64 ways=8) + PU L#61 (P#61) + L2Cache L#31 (size=4096KB linesize=64 ways=16) + Core L#31 (P#3) + L1dCache L#62 (size=32KB linesize=64 ways=8) + L1iCache L#62 (size=32KB linesize=64 ways=8) + PU L#62 (P#62) + L1dCache L#63 (size=32KB linesize=64 ways=8) + L1iCache L#63 (size=32KB linesize=64 ways=8) + PU L#63 (P#63) +depth 0: 1 Machine (type #0) + depth 1: 4 Package (type #1) + depth 2: 8 Die (type #19) + depth 3: 8 L3Cache (type #6) + depth 4: 16 Group0 (type #12) + depth 5: 32 L2Cache (type #5) + depth 6: 32 Core (type #2) + depth 7: 64 L1dCache (type #4) + depth 8: 64 L1iCache (type #9) + depth 9: 64 PU (type #3) +Special depth -3: 16 NUMANode (type #13) +Relative latency matrix (name NUMALatency kind 5) between 16 NUMANodes (depth -3) by logical indexes: + index 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + 0 10 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 + 1 20 10 20 20 20 20 20 20 20 20 20 20 20 20 20 20 + 2 20 20 10 20 20 20 20 20 20 20 20 20 20 20 20 20 + 3 20 20 20 10 20 20 20 20 20 20 20 20 20 20 20 20 + 4 20 20 20 20 10 20 20 20 20 20 20 20 20 20 20 20 + 5 20 20 20 20 20 10 20 20 20 20 20 20 20 20 20 20 + 6 20 20 20 20 20 20 10 20 20 20 20 20 20 20 20 20 + 7 20 20 20 20 20 20 20 10 20 20 20 20 20 20 20 20 + 8 20 20 20 20 20 20 20 20 10 20 20 20 20 20 20 20 + 9 20 20 20 20 20 20 20 20 20 10 20 20 20 20 20 20 + 10 20 20 20 20 20 20 20 20 20 20 10 20 20 20 20 20 + 11 20 20 20 20 20 20 20 20 20 20 20 10 20 20 20 20 + 12 20 20 20 20 20 20 20 20 20 20 20 20 10 20 20 20 + 13 20 20 20 20 20 20 20 20 20 20 20 20 20 10 20 20 + 14 20 20 20 20 20 20 20 20 20 20 20 20 20 20 10 20 + 15 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 10 +Topology not from this system diff --git a/tests/hwloc/linux/fakecpuid1f-64intel64-2p4d2n2c2t.tar.bz2 b/tests/hwloc/linux/fakecpuid1f-64intel64-2p4d2n2c2t.tar.bz2 new file mode 100644 index 0000000000..e8b74b1c53 Binary files /dev/null and b/tests/hwloc/linux/fakecpuid1f-64intel64-2p4d2n2c2t.tar.bz2 differ diff --git a/tests/hwloc/linux/fakeheteronuma.output b/tests/hwloc/linux/fakeheteronuma.output index 263314fded..5e83c99deb 100644 --- a/tests/hwloc/linux/fakeheteronuma.output +++ b/tests/hwloc/linux/fakeheteronuma.output @@ -84,7 +84,7 @@ - + 0 1 2 3 4 5 10 20 30 40 40 50 20 10 30 40 40 50 30 30 10 40 40 50 40 40 diff --git a/tests/hwloc/linux/fakememinitiators-1npc+1npc.output b/tests/hwloc/linux/fakememinitiators-1npc+1npc.output index 55bd7a9594..736c791a4f 100644 --- a/tests/hwloc/linux/fakememinitiators-1npc+1npc.output +++ b/tests/hwloc/linux/fakememinitiators-1npc+1npc.output @@ -41,7 +41,7 @@ depth 0: 1 Machine (type #0) depth 7: 4 PU (type #3) Special depth -3: 6 NUMANode (type #13) Special depth -8: 2 MemCache (type #18) -Relative latency matrix (kind 5) between 6 NUMANodes (depth -3) by logical indexes: +Relative latency matrix (name NUMALatency kind 5) between 6 NUMANodes (depth -3) by logical indexes: index 0 1 3 4 2 5 0 10 20 40 40 30 50 1 20 10 40 40 30 50 diff --git a/tests/hwloc/linux/nvidiagpunumanodes.kept.output b/tests/hwloc/linux/nvidiagpunumanodes.kept.output index b1e3fc9f5b..c7a46056e1 100644 --- a/tests/hwloc/linux/nvidiagpunumanodes.kept.output +++ b/tests/hwloc/linux/nvidiagpunumanodes.kept.output @@ -82,7 +82,7 @@ depth 0: 1 Machine (type #0) depth 6: 8 Core (type #2) depth 7: 32 PU (type #3) Special depth -3: 8 NUMANode (type #13) -Relative latency matrix (kind 5) between 8 NUMANodes (depth -3) by logical indexes: +Relative latency matrix (name NUMALatency kind 5) between 8 NUMANodes (depth -3) by logical indexes: index 0 4 5 6 7 1 2 3 0 10 40 80 80 80 80 80 80 4 40 10 80 80 80 80 80 80 diff --git a/tests/hwloc/linux/nvidiagpunumanodes.output b/tests/hwloc/linux/nvidiagpunumanodes.output index 2f52d1724b..10203e950a 100644 --- a/tests/hwloc/linux/nvidiagpunumanodes.output +++ b/tests/hwloc/linux/nvidiagpunumanodes.output @@ -76,7 +76,7 @@ depth 0: 1 Machine (type #0) depth 6: 8 Core (type #2) depth 7: 32 PU (type #3) Special depth -3: 2 NUMANode (type #13) -Relative latency matrix (kind 5) between 2 NUMANodes (depth -3) by logical indexes: +Relative latency matrix (name NUMALatency kind 5) between 2 NUMANodes (depth -3) by logical indexes: index 0 1 0 10 40 1 40 10 diff --git a/tests/hwloc/linux/test-topology.sh.in b/tests/hwloc/linux/test-topology.sh.in index 44e8a7033d..3a4fc7b4ba 100644 --- a/tests/hwloc/linux/test-topology.sh.in +++ b/tests/hwloc/linux/test-topology.sh.in @@ -25,6 +25,9 @@ export HWLOC_DONT_ADD_VERSION_INFO HWLOC_DEBUG_SORT_CHILDREN=1 export HWLOC_DEBUG_SORT_CHILDREN +HWLOC_COMPONENTS=linux,stop +export HWLOC_COMPONENTS + actual_output="$1" # make sure we use default numeric formats (only XML outputs are dis-localized when supported) diff --git a/tests/hwloc/rename/main.c b/tests/hwloc/rename/main.c index ec773eeb2d..b0a301a7d6 100644 --- a/tests/hwloc/rename/main.c +++ b/tests/hwloc/rename/main.c @@ -40,7 +40,6 @@ #include "hwloc/nvml.h" #endif #include "hwloc/gl.h" -#include "hwloc/intel-mic.h" #include "private/components.h" #include "private/internal-components.h" diff --git a/tests/hwloc/x86+linux/64amd64-4p2n4ca2co.output b/tests/hwloc/x86+linux/64amd64-4p2n4ca2co.output new file mode 100644 index 0000000000..424676116a --- /dev/null +++ b/tests/hwloc/x86+linux/64amd64-4p2n4ca2co.output @@ -0,0 +1,697 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 1 2 3 4 5 6 7 + 10 16 16 22 16 22 16 22 16 10 + 22 16 16 22 22 16 16 22 10 16 + 16 16 16 16 22 16 16 10 16 16 + 22 22 16 16 16 16 10 16 16 22 + 22 22 16 16 16 10 22 16 16 22 + 16 22 16 22 10 16 22 16 16 22 + 22 16 16 10 + + diff --git a/tests/hwloc/x86+linux/64amd64-4p2n4ca2co.tar.bz2 b/tests/hwloc/x86+linux/64amd64-4p2n4ca2co.tar.bz2 new file mode 100644 index 0000000000..ce4a500918 Binary files /dev/null and b/tests/hwloc/x86+linux/64amd64-4p2n4ca2co.tar.bz2 differ diff --git a/tests/hwloc/x86+linux/64amd64-4p2n4ca2co.topoextnuma.env b/tests/hwloc/x86+linux/64amd64-4p2n4ca2co.topoextnuma.env new file mode 100644 index 0000000000..cefaae9a83 --- /dev/null +++ b/tests/hwloc/x86+linux/64amd64-4p2n4ca2co.topoextnuma.env @@ -0,0 +1,2 @@ +HWLOC_X86_TOPOEXT_NUMANODES=1 +export HWLOC_X86_TOPOEXT_NUMANODES diff --git a/tests/hwloc/x86+linux/64amd64-4p2n4ca2co.topoextnuma.output b/tests/hwloc/x86+linux/64amd64-4p2n4ca2co.topoextnuma.output new file mode 100644 index 0000000000..424676116a --- /dev/null +++ b/tests/hwloc/x86+linux/64amd64-4p2n4ca2co.topoextnuma.output @@ -0,0 +1,697 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 1 2 3 4 5 6 7 + 10 16 16 22 16 22 16 22 16 10 + 22 16 16 22 22 16 16 22 10 16 + 16 16 16 16 22 16 16 10 16 16 + 22 22 16 16 16 16 10 16 16 22 + 22 22 16 16 16 10 22 16 16 22 + 16 22 16 22 10 16 22 16 16 22 + 22 16 16 10 + + diff --git a/tests/hwloc/x86+linux/64amd64-4p2n4ca2co.topoextnuma.source b/tests/hwloc/x86+linux/64amd64-4p2n4ca2co.topoextnuma.source new file mode 100644 index 0000000000..c0b7d6b692 --- /dev/null +++ b/tests/hwloc/x86+linux/64amd64-4p2n4ca2co.topoextnuma.source @@ -0,0 +1 @@ +64amd64-4p2n4ca2co.tar.bz2 diff --git a/tests/hwloc/x86+linux/Makefile.am b/tests/hwloc/x86+linux/Makefile.am new file mode 100644 index 0000000000..6d567c7888 --- /dev/null +++ b/tests/hwloc/x86+linux/Makefile.am @@ -0,0 +1,33 @@ +# Copyright © 2019 Inria. All rights reserved. +# See COPYING in top-level directory. + +AM_CFLAGS = $(HWLOC_CFLAGS) +AM_CPPFLAGS = $(HWLOC_CPPFLAGS) +AM_LDFLAGS = $(HWLOC_LDFLAGS) + +# Add your expected output file here. +# By default, it shows the output of `lstopo - -v'. +cpuid_sysfs_outputs = \ + 64amd64-4p2n4ca2co.output \ + 64amd64-4p2n4ca2co.topoextnuma.output + +# Each output `xyz.output' must have a corresponding tarball `xyz.tar.bz2' +# or a corresponding `xyz.source' specifying which tarball to use +cpuid_sysfs_tarballs = \ + 64amd64-4p2n4ca2co.tar.bz2 \ + 64amd64-4p2n4ca2co.topoextnuma.source + +# Each output `xyz.output' may have a corresponding `xyz.env' +# modifying the environment of lstopo +cpuid_sysfs_envs = \ + 64amd64-4p2n4ca2co.topoextnuma.env + +if HWLOC_HAVE_OPENAT +if HWLOC_HAVE_BUNZIPP +TESTS = $(cpuid_sysfs_outputs) +endif HWLOC_HAVE_BUNZIPP +endif HWLOC_HAVE_OPENAT + +EXTRA_DIST = $(cpuid_sysfs_outputs) $(cpuid_sysfs_tarballs) $(cpuid_sysfs_envs) + +LOG_COMPILER = $(builddir)/test-topology.sh diff --git a/tests/hwloc/x86+linux/test-topology.sh.in b/tests/hwloc/x86+linux/test-topology.sh.in new file mode 100644 index 0000000000..18bb2c0cd5 --- /dev/null +++ b/tests/hwloc/x86+linux/test-topology.sh.in @@ -0,0 +1,142 @@ +#!@BASH@ +#-*-sh-*- + +# +# Copyright © 2015-2019 Inria. All rights reserved. +# See COPYING in top-level directory. +# + +HWLOC_top_srcdir="@HWLOC_top_srcdir@" +HWLOC_top_builddir="@HWLOC_top_builddir@" +lstopo="$HWLOC_top_builddir/utils/lstopo/lstopo-no-graphics" + +HWLOC_PLUGINS_PATH=${HWLOC_top_builddir}/hwloc +export HWLOC_PLUGINS_PATH + +HWLOC_DONT_ADD_VERSION_INFO=1 +export HWLOC_DONT_ADD_VERSION_INFO + +actual_output="$1" + +# make sure we use default numeric formats +LANG=C +LC_ALL=C +export LANG LC_ALL + +error() +{ + echo $@ 2>&1 +} + +# test_topology NAME TOPOLOGY-DIR +# +# Test the topology under TOPOLOGY-DIR. Return true on success. +test_topology () +{ + local name="$1" + local dir="$2" + local expected_output="$3" + local options="$4" + + local output="`mktemp`" + + export HWLOC_THISSYSTEM=0 + export HWLOC_DEBUG_CHECK=1 + export HWLOC_COMPONENTS=x86,linux,stop + export HWLOC_FSROOT="$dir/fsroot" + export HWLOC_CPUID_PATH="$dir/cpuid" + + opts="--of xml -" + [ -r "$options" ] && opts=`cat $options` + + if ! "$lstopo" $opts \ + | sed -e 's/ gp_index="[0-9]*"//' \ + > "$output" + # filtered gp_index because it may change if we reorder discovery + # (not used in NUMA/PU distances, and regenerated (differently) during XML import). + then + result=1 + else + if [ "$HWLOC_UPDATE_TEST_TOPOLOGY_OUTPUT" != 1 ] + then + @DIFF@ -b @HWLOC_DIFF_U@ "$expected_output" "$output" + result=$? + else + if ! @DIFF@ "$expected_output" "$output" >/dev/null + then + cp -f "$output" "$expected_output" + echo "Updated $expected_output" + fi + result=0 + fi + fi + + rm "$output" + + return $result +} + +# test_eligible TOPOLOGY-DIR +# +# Return true if the topology under TOPOLOGY-DIR is eligible for +# testing with the current flavor. +test_eligible() +{ + local dir="$1" + local output="$2" + + [ -d "$dir" -a -f "$output" ] +} + + +if [ ! -x "$lstopo" ] +then + error "Could not find executable file \`$lstopo'." + exit 1 +fi + +topology="${actual_output%.output}" +if [ "$topology" = "$actual_output" ] ; +then + error "Input file \`$1' should end with .output" + exit 1 +fi +actual_options="$topology".options + +# if there's a .source file, use the tarball name it contains instead of $topology +if [ -f "$topology".source ] ; then + actual_source="$HWLOC_top_srcdir"/tests/hwloc/x86+linux/`cat "$topology".source` +else + actual_source="$topology".tar.bz2 +fi + +# if there's a .env file, source it +if [ -f "$topology".env ] ; then + source "$topology".env +fi + +result=1 + +dir="`mktemp -d`" + +if ! ( bunzip2 -c "$actual_source" | ( cd "$dir" && tar xf - $tar_options ) ) +then + error "failed to extract x86 cpuid \`$topology'" +else + actual_dir="`echo "$dir"/*`" + + if test_eligible "$actual_dir" "$actual_output" + then + test_count="`expr $test_count + 1`" + + test_topology "`basename $topology`" "$actual_dir" "$actual_output" "$actual_options" + result=$? + else + # Skip this test. + result=77 + fi +fi + +rm -rf "$dir" + +exit $result diff --git a/tests/hwloc/x86/Intel-CPUID.1F-Qemu-2p3d3c2t.options b/tests/hwloc/x86/Intel-CPUID.1F-Qemu-2p3d3c2t.options deleted file mode 100644 index 005971c08e..0000000000 --- a/tests/hwloc/x86/Intel-CPUID.1F-Qemu-2p3d3c2t.options +++ /dev/null @@ -1 +0,0 @@ ---filter l3cache:none diff --git a/tests/hwloc/x86/Intel-CPUID.1F-Qemu-2p3d3c2t.output b/tests/hwloc/x86/Intel-CPUID.1F-Qemu-2p3d3c2t.output index bbef44a52c..01ed30f2e3 100644 --- a/tests/hwloc/x86/Intel-CPUID.1F-Qemu-2p3d3c2t.output +++ b/tests/hwloc/x86/Intel-CPUID.1F-Qemu-2p3d3c2t.output @@ -1,64 +1,394 @@ -Machine - NUMANode L#0 (P#0) - Package L#0 - Group0(Die) L#0 - L2 L#0 (4096KB) + Core L#0 - L1d L#0 (32KB) + L1i L#0 (32KB) + PU L#0 (P#0) - L1d L#1 (32KB) + L1i L#1 (32KB) + PU L#1 (P#1) - L2 L#1 (4096KB) + Core L#1 - L1d L#2 (32KB) + L1i L#2 (32KB) + PU L#2 (P#2) - L1d L#3 (32KB) + L1i L#3 (32KB) + PU L#3 (P#3) - L2 L#2 (4096KB) + Core L#2 - L1d L#4 (32KB) + L1i L#4 (32KB) + PU L#4 (P#4) - L1d L#5 (32KB) + L1i L#5 (32KB) + PU L#5 (P#5) - Group0(Die) L#1 - L2 L#3 (4096KB) + Core L#3 - L1d L#6 (32KB) + L1i L#6 (32KB) + PU L#6 (P#6) - L1d L#7 (32KB) + L1i L#7 (32KB) + PU L#7 (P#7) - L2 L#4 (4096KB) + Core L#4 - L1d L#8 (32KB) + L1i L#8 (32KB) + PU L#8 (P#8) - L1d L#9 (32KB) + L1i L#9 (32KB) + PU L#9 (P#9) - L2 L#5 (4096KB) + Core L#5 - L1d L#10 (32KB) + L1i L#10 (32KB) + PU L#10 (P#10) - L1d L#11 (32KB) + L1i L#11 (32KB) + PU L#11 (P#11) - Group0(Die) L#2 - L2 L#6 (4096KB) + Core L#6 - L1d L#12 (32KB) + L1i L#12 (32KB) + PU L#12 (P#12) - L1d L#13 (32KB) + L1i L#13 (32KB) + PU L#13 (P#13) - L2 L#7 (4096KB) + Core L#7 - L1d L#14 (32KB) + L1i L#14 (32KB) + PU L#14 (P#14) - L1d L#15 (32KB) + L1i L#15 (32KB) + PU L#15 (P#15) - L2 L#8 (4096KB) + Core L#8 - L1d L#16 (32KB) + L1i L#16 (32KB) + PU L#16 (P#16) - L1d L#17 (32KB) + L1i L#17 (32KB) + PU L#17 (P#17) - Package L#1 - Group0(Die) L#3 - L2 L#9 (4096KB) + Core L#9 - L1d L#18 (32KB) + L1i L#18 (32KB) + PU L#18 (P#18) - L1d L#19 (32KB) + L1i L#19 (32KB) + PU L#19 (P#19) - L2 L#10 (4096KB) + Core L#10 - L1d L#20 (32KB) + L1i L#20 (32KB) + PU L#20 (P#20) - L1d L#21 (32KB) + L1i L#21 (32KB) + PU L#21 (P#21) - L2 L#11 (4096KB) + Core L#11 - L1d L#22 (32KB) + L1i L#22 (32KB) + PU L#22 (P#22) - L1d L#23 (32KB) + L1i L#23 (32KB) + PU L#23 (P#23) - Group0(Die) L#4 - L2 L#12 (4096KB) + Core L#12 - L1d L#24 (32KB) + L1i L#24 (32KB) + PU L#24 (P#24) - L1d L#25 (32KB) + L1i L#25 (32KB) + PU L#25 (P#25) - L2 L#13 (4096KB) + Core L#13 - L1d L#26 (32KB) + L1i L#26 (32KB) + PU L#26 (P#26) - L1d L#27 (32KB) + L1i L#27 (32KB) + PU L#27 (P#27) - L2 L#14 (4096KB) + Core L#14 - L1d L#28 (32KB) + L1i L#28 (32KB) + PU L#28 (P#28) - L1d L#29 (32KB) + L1i L#29 (32KB) + PU L#29 (P#29) - Group0(Die) L#5 - L2 L#15 (4096KB) + Core L#15 - L1d L#30 (32KB) + L1i L#30 (32KB) + PU L#30 (P#30) - L1d L#31 (32KB) + L1i L#31 (32KB) + PU L#31 (P#31) - L2 L#16 (4096KB) + Core L#16 - L1d L#32 (32KB) + L1i L#32 (32KB) + PU L#32 (P#32) - L1d L#33 (32KB) + L1i L#33 (32KB) + PU L#33 (P#33) - L2 L#17 (4096KB) + Core L#17 - L1d L#34 (32KB) + L1i L#34 (32KB) + PU L#34 (P#34) - L1d L#35 (32KB) + L1i L#35 (32KB) + PU L#35 (P#35) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/hwloc/x86/Intel-CascadeLake-2xXeon6230.output b/tests/hwloc/x86/Intel-CascadeLake-2xXeon6230.output new file mode 100644 index 0000000000..f679d28bb3 --- /dev/null +++ b/tests/hwloc/x86/Intel-CascadeLake-2xXeon6230.output @@ -0,0 +1,548 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/hwloc/x86/Intel-CascadeLake-2xXeon6230.tar.bz2 b/tests/hwloc/x86/Intel-CascadeLake-2xXeon6230.tar.bz2 new file mode 100644 index 0000000000..a44baaf2bd Binary files /dev/null and b/tests/hwloc/x86/Intel-CascadeLake-2xXeon6230.tar.bz2 differ diff --git a/tests/hwloc/x86/Makefile.am b/tests/hwloc/x86/Makefile.am index b355b3561a..760f37eb15 100644 --- a/tests/hwloc/x86/Makefile.am +++ b/tests/hwloc/x86/Makefile.am @@ -7,8 +7,12 @@ AM_LDFLAGS = $(HWLOC_LDFLAGS) # Add your expected output file here. # By default, it shows the output of `lstopo --of xml -'. +# +# XML outputs shouldn't have gp_index attributes to avoid spurious errors. +# Remove them with: sed -r -e 's/ gp_index=\"[0-9]+\"//' -i foo.outpu cpuid_outputs = \ Intel-CPUID.1F-Qemu-2p3d3c2t.output \ + Intel-CascadeLake-2xXeon6230.output \ Intel-Skylake-2xXeon6140.output \ Intel-Broadwell-2xXeon-E5-2650Lv4.output \ Intel-Haswell-2xXeon-E5-2680v3.output \ @@ -37,6 +41,7 @@ cpuid_outputs = \ # specifying which tarball to use cpuid_tarballs = \ Intel-CPUID.1F-Qemu-2p3d3c2t.tar.bz2 \ + Intel-CascadeLake-2xXeon6230.tar.bz2 \ Intel-Skylake-2xXeon6140.tar.bz2 \ Intel-Broadwell-2xXeon-E5-2650Lv4.tar.bz2 \ Intel-Haswell-2xXeon-E5-2680v3.tar.bz2 \ @@ -61,12 +66,11 @@ cpuid_tarballs = \ # Each output `xyz.output' may have a corresponding `xyz.options' # file modifying the behavior of lstopo -cpuid_options = \ - Intel-CPUID.1F-Qemu-2p3d3c2t.options +cpuid_options = # Each output `xyz.output' may have a corresponding `xyz.env' # modifying the environment of lstopo -cpuid_envs = +cpuid_envs = if HWLOC_HAVE_BUNZIPP TESTS = $(cpuid_outputs) diff --git a/tests/hwloc/xml/16amd64-4distances.console.output b/tests/hwloc/xml/16amd64-4distances.console.output index ceecda9a4a..b47294143e 100644 --- a/tests/hwloc/xml/16amd64-4distances.console.output +++ b/tests/hwloc/xml/16amd64-4distances.console.output @@ -106,7 +106,7 @@ depth 0: 1 Machine (type #0) depth 6: 16 Core (type #2) depth 7: 16 PU (type #3) Special depth -3: 8 NUMANode (type #13) -Relative latency matrix (kind 5) between 8 NUMANodes (depth -3) by logical indexes: +Relative latency matrix (name NUMALatency kind 5) between 8 NUMANodes (depth -3) by logical indexes: index 0 1 2 3 4 5 6 7 0 10 20 20 20 20 20 20 20 1 20 10 20 20 20 20 20 20 @@ -116,7 +116,7 @@ Relative latency matrix (kind 5) between 8 NUMANodes (depth -3) by logical index 5 20 20 20 20 20 10 20 20 6 20 20 20 20 20 20 10 20 7 20 20 20 20 20 20 20 10 -Relative latency matrix (kind 5) between 8 Packages (depth 2) by logical indexes: +Relative latency matrix (name (null) kind 5) between 8 Packages (depth 2) by logical indexes: index 1 0 3 6 4 7 2 5 1 10 20 40 40 80 80 80 80 0 20 10 40 40 80 80 80 80 @@ -126,13 +126,13 @@ Relative latency matrix (kind 5) between 8 Packages (depth 2) by logical indexes 7 80 80 80 80 20 10 40 40 2 80 80 80 80 40 40 10 20 5 80 80 80 80 40 40 20 10 -Relative latency matrix (kind 5) between 4 PUs (depth 7) by logical indexes: +Relative latency matrix (name (null) kind 5) between 4 PUs (depth 7) by logical indexes: index 0 1 2 3 0 145 200 300 400 1 567 687 457 2300 2 450 12300 45 67 3 4600 345600 100 100 -Relative latency matrix (kind 5) between 8 Cores (depth 6) by logical indexes: +Relative latency matrix (name (null) kind 5) between 8 Cores (depth 6) by logical indexes: index 0 1 2 3 4 5 6 7 0 10 20 40 40 80 80 80 80 1 20 10 40 40 80 80 80 80 diff --git a/tests/hwloc/xml/16amd64-4distances.xml b/tests/hwloc/xml/16amd64-4distances.xml index f8c16faca7..e05e4bb71c 100644 --- a/tests/hwloc/xml/16amd64-4distances.xml +++ b/tests/hwloc/xml/16amd64-4distances.xml @@ -223,7 +223,7 @@ - + 1 0 2 5 4 3 6 7 10 20 20 20 20 20 20 20 20 10 20 20 20 20 20 20 20 20 10 20 diff --git a/tests/hwloc/xml/16amd64-8n2c-cpusets.xml b/tests/hwloc/xml/16amd64-8n2c-cpusets.xml index 17c8a91d8a..a29523de24 100644 --- a/tests/hwloc/xml/16amd64-8n2c-cpusets.xml +++ b/tests/hwloc/xml/16amd64-8n2c-cpusets.xml @@ -108,7 +108,7 @@ - + 1 2 3 4 5 10 20 20 20 20 20 10 20 20 20 20 20 10 20 20 20 20 20 10 20 diff --git a/tests/hwloc/xml/192em64t-12gr2n8c2t.xml b/tests/hwloc/xml/192em64t-12gr2n8c2t.xml index e572e01f76..4b5526a806 100644 --- a/tests/hwloc/xml/192em64t-12gr2n8c2t.xml +++ b/tests/hwloc/xml/192em64t-12gr2n8c2t.xml @@ -2325,7 +2325,7 @@ - + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 diff --git a/tests/hwloc/xml/192em64t-24n8c2t.xml b/tests/hwloc/xml/192em64t-24n8c2t.xml index cb2db577c6..720fc232a7 100644 --- a/tests/hwloc/xml/192em64t-24n8c2t.xml +++ b/tests/hwloc/xml/192em64t-24n8c2t.xml @@ -2301,7 +2301,7 @@ - + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 diff --git a/tests/hwloc/xml/24em64t-2n6c2t-pci.xml b/tests/hwloc/xml/24em64t-2n6c2t-pci.xml index 6d01f0fd2b..3876116b4b 100644 --- a/tests/hwloc/xml/24em64t-2n6c2t-pci.xml +++ b/tests/hwloc/xml/24em64t-2n6c2t-pci.xml @@ -209,7 +209,7 @@ - + 0 1 10 20 20 10 diff --git a/tests/hwloc/xml/32em64t-2n8c2t-pci-noio.xml b/tests/hwloc/xml/32em64t-2n8c2t-pci-noio.xml index a0843bda45..59e3a4a31e 100644 --- a/tests/hwloc/xml/32em64t-2n8c2t-pci-noio.xml +++ b/tests/hwloc/xml/32em64t-2n8c2t-pci-noio.xml @@ -171,7 +171,7 @@ - + 0 1 10 20 20 10 diff --git a/tests/hwloc/xml/32em64t-2n8c2t-pci-normalio.xml b/tests/hwloc/xml/32em64t-2n8c2t-pci-normalio.xml index 0ed2ecfd9d..f9f54148fc 100644 --- a/tests/hwloc/xml/32em64t-2n8c2t-pci-normalio.xml +++ b/tests/hwloc/xml/32em64t-2n8c2t-pci-normalio.xml @@ -280,7 +280,7 @@ - + 0 1 10 20 20 10 diff --git a/tests/hwloc/xml/32em64t-2n8c2t-pci-wholeio.xml b/tests/hwloc/xml/32em64t-2n8c2t-pci-wholeio.xml index 3133d36e2a..86baa56b85 100644 --- a/tests/hwloc/xml/32em64t-2n8c2t-pci-wholeio.xml +++ b/tests/hwloc/xml/32em64t-2n8c2t-pci-wholeio.xml @@ -286,7 +286,7 @@ - + 0 1 10 20 20 10 diff --git a/tests/hwloc/xml/64intel64-3g2n+2n-irregulargroups+pci.console.output b/tests/hwloc/xml/64intel64-3g2n+2n-irregulargroups+pci.console.output index 44e1240107..6b42d49775 100644 --- a/tests/hwloc/xml/64intel64-3g2n+2n-irregulargroups+pci.console.output +++ b/tests/hwloc/xml/64intel64-3g2n+2n-irregulargroups+pci.console.output @@ -384,7 +384,7 @@ Special depth -3: 8 NUMANode (type #13) Special depth -4: 14 Bridge (type #14) Special depth -5: 8 PCIDev (type #15) Special depth -6: 3 OSDev (type #16) -Relative latency matrix (kind 5) between 8 NUMANodes (depth -3) by logical indexes: +Relative latency matrix (name NUMALatency kind 5) between 8 NUMANodes (depth -3) by logical indexes: index 0 1 2 3 4 5 6 7 0 10 50 65 65 65 65 65 65 1 50 10 65 65 65 65 65 65 diff --git a/tests/hwloc/xml/64intel64-3g2n+2n-irregulargroups+pci.xml b/tests/hwloc/xml/64intel64-3g2n+2n-irregulargroups+pci.xml index 3dd0ac5e5d..f693503086 100644 --- a/tests/hwloc/xml/64intel64-3g2n+2n-irregulargroups+pci.xml +++ b/tests/hwloc/xml/64intel64-3g2n+2n-irregulargroups+pci.xml @@ -797,7 +797,7 @@ - + 0 1 2 3 4 5 6 7 10 50 65 65 65 65 65 65 50 10 65 65 65 65 65 65 65 65 10 50 diff --git a/tests/hwloc/xml/96em64t-4n4d3ca2co-pci.xml b/tests/hwloc/xml/96em64t-4n4d3ca2co-pci.xml index 60a726a906..dc05780eb8 100644 --- a/tests/hwloc/xml/96em64t-4n4d3ca2co-pci.xml +++ b/tests/hwloc/xml/96em64t-4n4d3ca2co-pci.xml @@ -830,7 +830,7 @@ - + 0 1 2 3 10 26 26 26 26 10 26 26 26 26 10 26 26 26 26 10 diff --git a/tests/hwloc/xml/Makefile.am b/tests/hwloc/xml/Makefile.am index f701b60558..dd231b8694 100644 --- a/tests/hwloc/xml/Makefile.am +++ b/tests/hwloc/xml/Makefile.am @@ -31,6 +31,7 @@ xml_outputs = \ 96em64t-4n4d3ca2co-pci.xml \ 192em64t-12gr2n8c2t.xml \ 192em64t-24n8c2t.xml \ + fakeheterodistances.xml \ 8em64t-2p2ca2co-nonodesets.v1tov2.xml \ 8ia64-2n2s2c+1n.v1tov2.xml \ 16amd64-4distances.v1tov2.xml \ diff --git a/tests/hwloc/xml/fakeheterodistances.xml b/tests/hwloc/xml/fakeheterodistances.xml new file mode 100644 index 0000000000..41560765f1 --- /dev/null +++ b/tests/hwloc/xml/fakeheterodistances.xml @@ -0,0 +1,119 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 1 2 3 4 5 6 7 + 10 16 16 22 16 22 16 22 16 10 + 22 16 16 22 22 16 16 22 10 16 + 16 16 16 16 22 16 16 10 16 16 + 22 22 16 16 16 16 10 16 16 22 + 22 22 16 16 16 10 22 16 16 22 + 16 22 16 22 10 16 22 16 16 22 + 22 16 16 10 + + + NUMANode:254 NUMANode:255 NUMANode:256 NUMANode:257 NUMANode:258 Group:500 Group:501 NUMANode:261 + 10 16 16 22 16 22 16 22 16 10 + 22 16 16 22 22 16 16 22 10 16 + 16 16 16 16 22 16 16 10 16 16 + 22 22 16 16 16 16 10 16 16 22 + 22 22 16 16 16 10 22 16 16 22 + 16 22 16 22 10 16 22 16 16 22 + 22 16 16 10 + + diff --git a/utils/hwloc/Makefile.am b/utils/hwloc/Makefile.am index 70278cc5ea..7011a5d77b 100644 --- a/utils/hwloc/Makefile.am +++ b/utils/hwloc/Makefile.am @@ -71,7 +71,6 @@ if HWLOC_HAVE_LINUX bin_SCRIPTS += hwloc-gather-topology endif HWLOC_HAVE_LINUX -if !HWLOC_HAVE_MINGW32 TESTS = \ test-hwloc-annotate.sh \ test-hwloc-calc.sh \ @@ -82,7 +81,6 @@ TESTS = \ if HWLOC_HAVE_PLUGINS TESTS += test-fake-plugin.sh endif HWLOC_HAVE_PLUGINS -endif !HWLOC_HAVE_MINGW32 SEDMAN = $(SED) -e 's/%PACKAGE_NAME%/@PACKAGE_NAME@/g' \ -e 's/%PACKAGE_VERSION%/@PACKAGE_VERSION@/g' \ @@ -163,7 +161,7 @@ install-exec-hook: $(SED) -e 's/HWLOC_top_builddir\/utils\/hwloc/bindir/' -e 's/HWLOC_top_builddir\/utils\/lstopo/bindir/' -e '/HWLOC_top_builddir/d' $(DESTDIR)$(bindir)/hwloc-compress-dir > $(DESTDIR)$(bindir)/hwloc-compress-dir.tmp && mv -f $(DESTDIR)$(bindir)/hwloc-compress-dir.tmp $(DESTDIR)$(bindir)/hwloc-compress-dir chmod +x $(DESTDIR)$(bindir)/hwloc-compress-dir if HWLOC_HAVE_LINUX - $(SED) -e 's/HWLOC_top_builddir\/utils\/lstopo/bindir/' -e '/HWLOC_top_builddir/d' $(DESTDIR)$(bindir)/hwloc-gather-topology > $(DESTDIR)$(bindir)/hwloc-gather-topology.tmp && mv -f $(DESTDIR)$(bindir)/hwloc-gather-topology.tmp $(DESTDIR)$(bindir)/hwloc-gather-topology + $(SED) -e 's/HWLOC_top_builddir\/utils\/lstopo/bindir/' -e 's/HWLOC_top_builddir\/utils\/hwloc/bindir/' -e '/HWLOC_top_builddir/d' $(DESTDIR)$(bindir)/hwloc-gather-topology > $(DESTDIR)$(bindir)/hwloc-gather-topology.tmp && mv -f $(DESTDIR)$(bindir)/hwloc-gather-topology.tmp $(DESTDIR)$(bindir)/hwloc-gather-topology chmod +x $(DESTDIR)$(bindir)/hwloc-gather-topology endif HWLOC_HAVE_LINUX diff --git a/utils/hwloc/hwloc-annotate.c b/utils/hwloc/hwloc-annotate.c index 9ed39bf70d..d9fed25339 100644 --- a/utils/hwloc/hwloc-annotate.c +++ b/utils/hwloc/hwloc-annotate.c @@ -252,7 +252,8 @@ int main(int argc, char *argv[]) hwloc_utils_check_api_version(callname); - putenv((char *) "HWLOC_XML_VERBOSE=1"); + if (!getenv("HWLOC_XML_VERBOSE")) + putenv((char *) "HWLOC_XML_VERBOSE=1"); while (argc && *argv[0] == '-') { if (!strcmp(argv[0], "--ci")) diff --git a/utils/hwloc/hwloc-bind.1in b/utils/hwloc/hwloc-bind.1in index 4427ff075f..59d8734df5 100644 --- a/utils/hwloc/hwloc-bind.1in +++ b/utils/hwloc/hwloc-bind.1in @@ -1,5 +1,5 @@ .\" -*- nroff -*- -.\" Copyright © 2009-2018 Inria. All rights reserved. +.\" Copyright © 2009-2019 Inria. All rights reserved. .\" Copyright © 2010 Université of Bordeaux .\" Copyright © 2009-2010 Cisco Systems, Inc. All rights reserved. .\" See COPYING in top-level directory. @@ -97,6 +97,9 @@ No location may be given since no binding is performed. \fB\-\-single\fR Bind on a single CPU to prevent migration. .TP +\fB\-\-no\-smt\fR +Only keep a single PU per core before binding. +.TP \fB\-\-strict\fR Require strict binding. .TP @@ -213,6 +216,11 @@ the second core of the first package: $ hwloc-bind package:1.core:0 package:0.core:1 -- echo hello +To bind on the first PU of all cores of the first package: + + $ hwloc-bind package:0.core:all.pu:0 -- echo hello + $ hwloc-bind --no-smt package:0 -- echo hello + To bind memory on the first high-bandwidth memory node: $ hwloc-bind --membind hbm:0 -- echo hello diff --git a/utils/hwloc/hwloc-bind.c b/utils/hwloc/hwloc-bind.c index d3aa03e018..b8ee2aae85 100644 --- a/utils/hwloc/hwloc-bind.c +++ b/utils/hwloc/hwloc-bind.c @@ -50,6 +50,7 @@ void usage(const char *name, FILE *where) #endif fprintf(where, " --taskset Use taskset-specific format when displaying cpuset strings\n"); fprintf(where, "Input topology options:\n"); + fprintf(where, " --no-smt Only keep a single PU per core\n"); fprintf(where, " --restrict Restrict the topology to processors listed in \n"); fprintf(where, " --disallowed Include objects disallowed by administrative limitations\n"); fprintf(where, " --hbm Only consider high bandwidth memory nodes\n"); @@ -76,6 +77,7 @@ int main(int argc, char *argv[]) int force = 0; int single = 0; int verbose = 0; + int no_smt = 0; int only_hbm = -1; int logical = 1; int taskset = 0; @@ -142,6 +144,10 @@ int main(int argc, char *argv[]) single = 1; goto next; } + if (!strcmp(argv[0], "--no-smt")) { + no_smt = 1; + goto next; + } if (!strcmp(argv[0], "-f") || !strcmp(argv[0], "--force")) { force = 1; goto next; @@ -448,6 +454,11 @@ int main(int argc, char *argv[]) fprintf(stderr, "--mempolicy ignored unless memory binding is also requested with --membind.\n"); } + if (!got_cpubind && no_smt) { + hwloc_bitmap_copy(cpubind_set, hwloc_topology_get_topology_cpuset(topology)); + got_cpubind = 1; + } + if (got_cpubind) { if (hwloc_bitmap_iszero(cpubind_set)) { if (verbose >= 0) @@ -466,6 +477,23 @@ int main(int argc, char *argv[]) fprintf(stderr, "Conflicting CPU and memory binding requested, adding HWLOC_CPUBIND_NOMEMBIND flag.\n"); cpubind_flags |= HWLOC_CPUBIND_NOMEMBIND; } + if (no_smt) { + if (hwloc_get_type_depth(topology, HWLOC_OBJ_CORE) == HWLOC_TYPE_DEPTH_UNKNOWN) { + fprintf(stderr, "Topology has no Core object, ignoring --no-smt\n"); + } else { + hwloc_obj_t core = NULL; + while ((core = hwloc_get_next_obj_covering_cpuset_by_type(topology, cpubind_set, HWLOC_OBJ_CORE, core)) != NULL) { + int firstpu = hwloc_bitmap_first(core->cpuset); + int hadpu = hwloc_bitmap_isset(cpubind_set, firstpu); + assert(firstpu >= 0); + /* remove the entire core */ + hwloc_bitmap_andnot(cpubind_set, cpubind_set, core->cpuset); + /* put back its first PU if it was there */ + if (hadpu) + hwloc_bitmap_set(cpubind_set, firstpu); + } + } + } if (single) hwloc_bitmap_singlify(cpubind_set); if (pid_number > 0) diff --git a/utils/hwloc/hwloc-calc.1in b/utils/hwloc/hwloc-calc.1in index ef3f7e6200..bc54093d36 100644 --- a/utils/hwloc/hwloc-calc.1in +++ b/utils/hwloc/hwloc-calc.1in @@ -1,5 +1,5 @@ .\" -*- nroff -*- -.\" Copyright © 2010-2018 Inria. All rights reserved. +.\" Copyright © 2010-2019 Inria. All rights reserved. .\" Copyright © 2009 Cisco Systems, Inc. All rights reserved. .\" See COPYING in top-level directory. .TH HWLOC-CALC "1" "%HWLOC_DATE%" "%PACKAGE_VERSION%" "%PACKAGE_NAME%" @@ -93,6 +93,9 @@ while a comma is used to separate indexes \fB\-\-single\fR Singlify the output to a single CPU. .TP +\fB\-\-no\-smt\fR +Only keep a single PU per core in the input locations. +.TP \fB\-\-taskset\fR Display CPU set strings in the format recognized by the taskset command-line program instead of hwloc-specific CPU set string format. @@ -214,7 +217,7 @@ package: To bind GNU OpenMP threads logically over the whole machine, we need to use physical number output instead: - $ export GOMP_CPU_AFFINITY=`hwloc-calc --physical-output --intersect PU machine:0` + $ export GOMP_CPU_AFFINITY=`hwloc-calc --physical-output --intersect PU all` $ echo $GOMP_CPU_AFFINITY 0,4,1,5,2,6,3,7 @@ -271,14 +274,15 @@ To synthetize a set of cores into largest objects on a 2-node 2-package 2-core m To get the set of first threads of all cores: $ hwloc-calc core:all.pu:0 + $ hwloc-calc --no-smt all This can also be very useful in order to make GNU OpenMP use exactly one thread per core, and in logical core order: - $ export OMP_NUM_THREADS=`hwloc-calc --number-of core machine:0` + $ export OMP_NUM_THREADS=`hwloc-calc --number-of core all` $ echo $OMP_NUM_THREADS 4 - $ export GOMP_CPU_AFFINITY=`hwloc-calc --physical-output --intersect PU core:all.pu:0` + $ export GOMP_CPU_AFFINITY=`hwloc-calc --physical-output --intersect PU --no-smt all` $ echo $GOMP_CPU_AFFINITY 0,2,1,3 diff --git a/utils/hwloc/hwloc-calc.c b/utils/hwloc/hwloc-calc.c index bcbd71e19e..920e784fca 100644 --- a/utils/hwloc/hwloc-calc.c +++ b/utils/hwloc/hwloc-calc.c @@ -46,6 +46,7 @@ void usage(const char *callname __hwloc_attribute_unused, FILE *where) fprintf(where, " --taskset Use taskset-specific format when displaying cpuset strings\n"); fprintf(where, " --single Singlify the output to a single CPU\n"); fprintf(where, "Input topology options:\n"); + fprintf(where, " --no-smt Only keep a single PU per core\n"); fprintf(where, " --restrict Restrict the topology to processors listed in \n"); fprintf(where, " --disallowed Include objects disallowed by administrative limitations\n"); hwloc_utils_input_format_usage(where, 10); @@ -65,6 +66,7 @@ static int intersectdepth = -1; static int hiernblevels = 0; static int *hierdepth = NULL; static int showobjs = 0; +static int no_smt = 0; static int singlify = 0; static int taskset = 0; @@ -101,6 +103,24 @@ hwloc_calc_hierarch_output(hwloc_topology_t topology, const char *prefix, const static int hwloc_calc_output(hwloc_topology_t topology, const char *sep, hwloc_bitmap_t set) { + if (no_smt) { + if (hwloc_get_type_depth(topology, HWLOC_OBJ_CORE) == HWLOC_TYPE_DEPTH_UNKNOWN) { + fprintf(stderr, "Topology has no Core object, ignoring --no-smt\n"); + } else { + hwloc_obj_t core = NULL; + while ((core = hwloc_get_next_obj_covering_cpuset_by_type(topology, set, HWLOC_OBJ_CORE, core)) != NULL) { + int firstpu = hwloc_bitmap_first(core->cpuset); + int hadpu = hwloc_bitmap_isset(set, firstpu); + assert(firstpu >= 0); + /* remove the entire core */ + hwloc_bitmap_andnot(set, set, core->cpuset); + /* put back its first PU if it was there */ + if (hadpu) + hwloc_bitmap_set(set, firstpu); + } + } + } + if (singlify) hwloc_bitmap_singlify(set); @@ -221,8 +241,10 @@ int main(int argc, char *argv[]) hwloc_utils_check_api_version(callname); /* enable verbose backends */ - putenv((char *) "HWLOC_XML_VERBOSE=1"); - putenv((char *) "HWLOC_SYNTHETIC_VERBOSE=1"); + if (!getenv("HWLOC_XML_VERBOSE")) + putenv((char *) "HWLOC_XML_VERBOSE=1"); + if (!getenv("HWLOC_SYNTHETIC_VERBOSE")) + putenv((char *) "HWLOC_SYNTHETIC_VERBOSE=1"); set = hwloc_bitmap_alloc(); @@ -233,7 +255,7 @@ int main(int argc, char *argv[]) hwloc_topology_set_all_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_ALL); \ hwloc_topology_set_flags(topology, flags); \ if (input) { \ - err = hwloc_utils_enable_input_format(topology, input, &input_format, verbose, callname); \ + err = hwloc_utils_enable_input_format(topology, flags, input, &input_format, verbose, callname); \ if (err) return EXIT_FAILURE; \ } \ err = hwloc_topology_load(topology); \ @@ -265,6 +287,10 @@ int main(int argc, char *argv[]) usage(callname, stdout); return EXIT_SUCCESS; } + if (!strcmp (argv[0], "--no-smt")) { + no_smt = 1; + goto next; + } if (!strcmp (argv[0], "--restrict")) { hwloc_bitmap_t restrictset; if (argc < 2) { diff --git a/utils/hwloc/hwloc-diff.c b/utils/hwloc/hwloc-diff.c index 072483c00c..61622044ba 100644 --- a/utils/hwloc/hwloc-diff.c +++ b/utils/hwloc/hwloc-diff.c @@ -35,7 +35,8 @@ int main(int argc, char *argv[]) hwloc_utils_check_api_version(callname); - putenv((char *) "HWLOC_XML_VERBOSE=1"); + if (!getenv("HWLOC_XML_VERBOSE")) + putenv((char *) "HWLOC_XML_VERBOSE=1"); while (argc && *argv[0] == '-') { if (!strcmp (argv[0], "--refname")) { diff --git a/utils/hwloc/hwloc-distrib.1in b/utils/hwloc/hwloc-distrib.1in index fa5bfa8ed7..62e5e416d0 100644 --- a/utils/hwloc/hwloc-distrib.1in +++ b/utils/hwloc/hwloc-distrib.1in @@ -4,15 +4,17 @@ .\" See COPYING in top-level directory. .TH HWLOC-DISTRIB "1" "%HWLOC_DATE%" "%PACKAGE_VERSION%" "%PACKAGE_NAME%" .SH NAME -hwloc-distrib \- Build a number of cpu masks distributed on the system +hwloc-distrib \- Build a number of cpu masks for a type of object, distributed on the system with a policy. . .\" ************************** .\" Synopsis Section .\" ************************** .SH SYNOPSIS -.B hwloc-distrib -[\fIoptions\fR] \fI\fR -. +.B hwloc-distrib \fIround-robin\fR \fI\fR [\fIoptions\fR] + +.B hwloc-distrib \fIscatter\fR \fI\fR [\fIoptions\fR] + +.B hwloc-distrib \fI:...:\fR [\fIoptions\fR] .\" ************************** .\" Options Section .\" ************************** @@ -25,6 +27,12 @@ Singlify each output to a single CPU. Show CPU set strings in the format recognized by the taskset command-line program instead of hwloc-specific CPU set string format. .TP +\fB\-\-logical-index\fR +Show logical index of distributed objects instead of CPU set format. +.TP +\fB\-\-physical-index\fR +Show OS index of distributed objects instead of CPU set format. +.TP \fB\-v\fR \fB\-\-verbose\fR Verbose messages. .TP @@ -55,29 +63,21 @@ Enforce the input in the given format, among \fBxml\fR, \fBfsroot\fR, \fB\-\-ignore\fR Ignore all objects of type in the topology. .TP -\fB\-\-from\fR -Distribute starting from objects of the given type instead of from -the top of the topology hierarchy, i.e. ignoring the structure given by objects -above. - - cannot be among NUMANode, I/O or Misc types. -.TP -\fB\-\-to\fR -Distribute down to objects of the given type instead of down to the bottom of -the topology hierarchy, i.e. ignoring the structure given by objects below. -This may be useful if some latitude is desired for the binding, e.g. just bind -several processes to each package without specifying a single core for each -of them. - - cannot be among NUMANode, I/O or Misc types. +\fB\-\-from\fR +Distribute starting from objects with this logical index. .TP -\fB\-\-at\fR -Distribute among objects of the given type. This is equivalent to specifying -both \fB\-\-from\fR and \fB\-\-to\fR at the same time. +\fB\-n\fR +Distribute this amount of objects. If integer is larger than the +number of objects to distribute, the distribution will cycle. .TP \fB\-\-reverse\fR -Distribute by starting with the last objects first, -and singlify CPU sets by keeping the last bit (instead of the first bit). +Reverse distribution order provided by the policy. +.TP +\fB\-\-shuffle\fR +Randomize distribution. +With round-robin policy, the whole distribution is randomized. +With scatter and custom policies, the policy is preserved but the index +inside levels is shuffled. .TP \fB\-\-restrict\fR Restrict the topology to the given cpuset. @@ -98,11 +98,13 @@ Display help message and exit. . hwloc-distrib generates a series of CPU masks corresponding to a distribution of a given number of elements over the topology of the machine. The distribution -is done recursively from the top of the hierarchy (or from the level specified -by option \fB\-\-from\fR) down to the bottom of the hierarchy (or down to the -level specified by option \fB\-\-to\fR, or until only one element remains), -splitting the number of elements at each encountered hierarchy level not ignored -by options \fB\-\-ignore\fR. +is done recursively from the last level argument to the first level argument. +Round-robin policy is equivalent to distribute from the leaves to the root. +Scatter policy is equivalent to distribute from the root to the leaves. +Leaves are the type of object output by hwloc-distrib. +Leaves of round-robin and scatter policies are set in argument. +Leaves are the deepest objects in the list of provided levels :...: +when using a custom policy. . .PP This can e.g. be used to distribute a set of processes hierarchically according @@ -122,45 +124,45 @@ hwloc(7) directly apply to the hwloc-bind utility. hwloc-distrib's operation is best described through several examples. . .PP -If 4 processes have to be distributed across a machine, their CPU masks -may be obtained with: - - $ hwloc-distrib 4 - 0x0000000f - 0x00000f00 - 0x000000f0 - 0x0000f000 +If 4 processes have to be distributed across a machine and bound to a single +package each, their CPU masks may be obtained with: + $ hwloc-distrib scatter package -n 4 + 0x00ff00ff + 0xff00ff00 + 0x00ff00ff + 0xff00ff00 + To distribute only among the second package, the topology should be restricted: - $ hwloc-distrib --restrict $(hwloc-calc package:1) 4 - 0x00000010 - 0x00000020 - 0x00000040 - 0x00000080 + $ hwloc-distrib scatter pu -n 4 --restrict $(hwloc-calc package:1) + 0xff00ff00 + 0xff00ff00 + 0xff00ff00 + 0xff00ff00 To get a single processor of each CPU masks (prevent migration in case of binding) - $ hwloc-distrib 4 --single + $ hwloc-distrib scatter pu -n 4 0x00000001 0x00000100 - 0x00000010 - 0x00001000 + 0x00000002 + 0x00000200 Each output line may be converted independently with hwloc-calc: - $ hwloc-distrib 4 --single | hwloc-calc --taskset + $ hwloc-distrib scatter pu -n 4 | hwloc-calc --taskset 0x1 0x100 - 0x10 - 0x1000 + 0x2 + 0x200 To convert the output into a list of processors that may be passed to dplace -c inside a mpirun command line: - $ hwloc-distrib 4 --single | xargs hwloc-calc --pulist - 0,8,4,16 + $ hwloc-distrib scatter pu -n 4 | xargs hwloc-calc --pulist + 0,2,16,18 . . .\" ************************** diff --git a/utils/hwloc/hwloc-distrib.c b/utils/hwloc/hwloc-distrib.c index 33b86274dd..eb8144b436 100644 --- a/utils/hwloc/hwloc-distrib.c +++ b/utils/hwloc/hwloc-distrib.c @@ -14,54 +14,161 @@ #include #endif -void usage(const char *callname __hwloc_attribute_unused, FILE *where) +char *callname; +hwloc_topology_t topology; + +void usage(const char *_callname __hwloc_attribute_unused, FILE *where) { - fprintf(where, "Usage: hwloc-distrib [options] number\n"); + fprintf(where, "Usage: hwloc-distrib round-robin [options]\n"); + fprintf(where, " hwloc-distrib scatter [options]\n"); + fprintf(where, " hwloc-distrib [options]\n"); + fprintf(where, "Distribution options:\n"); - fprintf(where, " --ignore Ignore objects of the given type\n"); - fprintf(where, " --from Distribute starting from objects of the given type\n"); - fprintf(where, " --to Distribute down to objects of the given type\n"); - fprintf(where, " --at Distribute among objects of the given type\n"); - fprintf(where, " --reverse Distribute by starting from last objects\n"); + fprintf(where, " --ignore Ignore objects of the given type\n"); + fprintf(where, " -n Distribute objects. Cycle if there is less than objects.\n"); + fprintf(where, " --reverse Distribute by starting from last objects\n"); + fprintf(where, " --shuffle Stick to distribution policy but shuffle indexes inside levels.\n"); + fprintf(where, " --from Logical index of the first object of type to distribute.\n"); + fprintf(where, "Input topology options:\n"); - fprintf(where, " --restrict Restrict the topology to processors listed in \n"); - fprintf(where, " --disallowed Include objects disallowed by administrative limitations\n"); + fprintf(where, " --restrict Restrict the topology to processors listed in \n"); + fprintf(where, " --disallowed Include objects disallowed by administrative limitations\n"); hwloc_utils_input_format_usage(where, 0); fprintf(where, "Formatting options:\n"); - fprintf(where, " --single Singlify each output to a single CPU\n"); - fprintf(where, " --taskset Show taskset-specific cpuset strings\n"); + fprintf(where, " --single Singlify each output to a single CPU\n"); + fprintf(where, " --taskset Show taskset-specific cpuset strings\n"); + fprintf(where, " --logical-index Show objects logical index\n"); + fprintf(where, " --physical-index Show objects os index\n"); fprintf(where, "Miscellaneous options:\n"); - fprintf(where, " -v --verbose Show verbose messages\n"); - fprintf(where, " --version Report version and exit\n"); + fprintf(where, " -v --verbose Show verbose messages\n"); + fprintf(where, " --version Report version and exit\n"); +} + +#define ROUND_ROBIN 0 +#define SCATTER 1 +#define CUSTOM 2 +char *arg_types; // argv containing types to parse +int policy; // policy among ROUND_ROBIN, SCATTER, CUSTOM. +hwloc_obj_type_t *policy_types = NULL; // resulting types after parsing arg_types +int num_types=1; // The number of parsed types in policy_types. + +static hwloc_obj_type_t parse_policy_type(const char* type){ + int depth; + hwloc_obj_t obj; + + if (hwloc_type_sscanf_as_depth(type, NULL, topology, &depth) < 0) { + fprintf(stderr, "Unrecognized type `%s'.\n", type); + exit(EXIT_FAILURE); + } + if (depth < 0){ + fprintf(stderr, "Unsupported policy type `%s' with negative depth.\n", type); + exit(EXIT_FAILURE); + } + obj = hwloc_get_obj_by_depth(topology, depth, 0); + assert(obj != NULL); + + return obj->type; +} + +// Parse string in arg_types after topology create, load, filter etc... +static void parse_policy(void){ + size_t i; + char *type; + + if (policy == ROUND_ROBIN){ + num_types = 1; + policy_types = malloc(sizeof(*policy_types)); + *policy_types = parse_policy_type(arg_types); + } + else if (policy == SCATTER){ + num_types = 1; + policy_types = malloc(sizeof(*policy_types)); + *policy_types = parse_policy_type(arg_types); + } + else { + for(i=0; i 0 && next->logical_index != from_index ) {} + do { + if (logical_index) { + printf("%d\n", next->logical_index); + } else if (physical_index){ + printf("%d\n", next->os_index); + } else { + hwloc_bitmap_copy(cpuset, next->cpuset); + if (singlify) { + if (dflags & HWLOC_DISTRIB_FLAG_REVERSE) { + last = hwloc_bitmap_last(cpuset); + hwloc_bitmap_only(cpuset, last); + } else { + hwloc_bitmap_singlify(cpuset); + } + } + if (taskset) + hwloc_bitmap_taskset_asprintf(&str, cpuset); + else + hwloc_bitmap_asprintf(&str, cpuset); + printf("%s\n", str); + free(str); + } + if ((! continue_it && n < 0) || --n == 0) + break; + continue_it = hwloc_distrib_iterator_next(topology, it, &next); + } while (1); + hwloc_bitmap_free(cpuset); + hwloc_distrib_destroy_iterator(it); + free(policy_types); + hwloc_topology_destroy(topology); return EXIT_SUCCESS; diff --git a/utils/hwloc/hwloc-gather-cpuid.1in b/utils/hwloc/hwloc-gather-cpuid.1in index 4e7964ffd4..bdb28d5940 100644 --- a/utils/hwloc/hwloc-gather-cpuid.1in +++ b/utils/hwloc/hwloc-gather-cpuid.1in @@ -1,5 +1,5 @@ .\" -*- nroff -*- -.\" Copyright © 2015-2018 Inria. All rights reserved. +.\" Copyright © 2015-2019 Inria. All rights reserved. .\" See COPYING in top-level directory. .TH HWLOC-GATHER-CPUID "1" "%HWLOC_DATE%" "%PACKAGE_VERSION%" "%PACKAGE_NAME%" .SH NAME @@ -23,6 +23,9 @@ for later (possibly offline) usage Only gather cpuid values for logical processor whose OS/physical index is . .TP +\fB\-s\fR \fB\-\-silent\fR +Do not show verbose messages. +.TP \fB\-h\fR \fB\-\-help\fR Display help message and exit. . @@ -61,6 +64,11 @@ unless it is clear that they contain no sensitive information. . .PP .B NOTE: +The output of \fBhwloc-gather-cpuid\fR is included in the tarball +saved by \fBhwloc-gather-topology\fR on Linux/x86. +. +.PP +.B NOTE: It is highly recommended that you read the hwloc(7) overview page before reading this man page. . @@ -95,5 +103,5 @@ such as (but not limited to) failure to create the output files. .SH SEE ALSO . .ft R -hwloc(7), lstopo(1) +hwloc(7), hwloc-gather-topology(1), lstopo(1) .sp diff --git a/utils/hwloc/hwloc-gather-cpuid.c b/utils/hwloc/hwloc-gather-cpuid.c index dfb2d594d9..ea1d41d8a6 100644 --- a/utils/hwloc/hwloc-gather-cpuid.c +++ b/utils/hwloc/hwloc-gather-cpuid.c @@ -27,6 +27,8 @@ #endif #endif +static int verbose = 1; + static void dump_one_cpuid(FILE *output, unsigned *regs, unsigned inregmask) { unsigned i; @@ -67,10 +69,12 @@ static int dump_one_proc(hwloc_topology_t topo, hwloc_obj_t pu, const char *path fprintf(stderr, "Cannot open file '%s' for writing: %s\n", path, strerror(errno)); return -1; } - printf("Gathering CPUID of PU P#%u in path %s ...\n", pu->os_index, path); + if (verbose) + printf("Gathering CPUID of PU P#%u in path %s ...\n", pu->os_index, path); } else { output = stdout; - printf("Gathering CPUID of PU P#%u on stdout ...\n", pu->os_index); + if (verbose) + printf("Gathering CPUID of PU P#%u on stdout ...\n", pu->os_index); } fprintf(output, "# mask e[abcd]x => e[abcd]x\n"); @@ -423,8 +427,9 @@ void usage(const char *callname, FILE *where) fprintf(where, "Usage : %s [ options ] ... [ outdir ]\n", callname); fprintf(where, " outdir is an optional output directory instead of cpuid/\n"); fprintf(where, "Options:\n"); - fprintf(where, " -c Only gather for logical processor with logical index \n"); - fprintf(where, " -h --help Show this usage\n"); + fprintf(where, " -c Only gather for logical processor with logical index \n"); + fprintf(where, " -s --silent Do not show verbose messages\n"); + fprintf(where, " -h --help Show this usage\n"); } int main(int argc, const char * const argv[]) @@ -455,6 +460,10 @@ int main(int argc, const char * const argv[]) idx = atoi(argv[1]); argc -= 2; argv += 2; + } else if (argc >= 1 && (!strcmp(argv[0], "-s") || !strcmp(argv[0], "--silent"))) { + verbose--; + argc--; + argv++; } else if (!strcmp(argv[0], "-h") || !strcmp(argv[0], "--help")) { usage(callname, stdout); goto out; @@ -488,7 +497,8 @@ int main(int argc, const char * const argv[]) } if (!strcmp(basedir, "-")) { - printf("Gathering on stdout ...\n"); + if (verbose) + printf("Gathering on stdout ...\n"); if (idx == (unsigned) -1) { fprintf(stderr, "Cannot gather multiple PUs on stdout.\n"); ret = EXIT_FAILURE; @@ -505,7 +515,8 @@ int main(int argc, const char * const argv[]) goto out_with_topo; } } - printf("Gathering in directory %s ...\n", basedir); + if (verbose) + printf("Gathering in directory %s ...\n", basedir); pathlen = strlen(basedir) + 20; /* for '/pu%u' or '/hwloc-cpuid-info' */ path = malloc(pathlen); @@ -526,7 +537,8 @@ int main(int argc, const char * const argv[]) if (file) { fprintf(file, "Architecture: x86\n"); fclose(file); - printf("Summary written to %s\n", path); + if (verbose) + printf("Summary written to %s\n", path); } else { fprintf(stderr, "Failed to open summary file '%s' for writing: %s\n", path, strerror(errno)); } @@ -543,9 +555,10 @@ int main(int argc, const char * const argv[]) } } - printf("\n" - "WARNING: Do not post these files on a public list or website unless you\n" - "WARNING: are sure that no information about this platform is sensitive.\n"); + if (verbose) + printf("\n" + "WARNING: Do not post these files on a public list or website unless you\n" + "WARNING: are sure that no information about this platform is sensitive.\n"); out_with_path: free(path); diff --git a/utils/hwloc/hwloc-gather-topology.1in b/utils/hwloc/hwloc-gather-topology.1in index 377cd03fa3..5b21ec9973 100644 --- a/utils/hwloc/hwloc-gather-topology.1in +++ b/utils/hwloc/hwloc-gather-topology.1in @@ -1,10 +1,10 @@ .\" -*- nroff -*- .\" Copyright © 2010 Jirka Hladky -.\" Copyright © 2010-2018 Inria. All rights reserved. +.\" Copyright © 2010-2019 Inria. All rights reserved. .\" See COPYING in top-level directory. .TH HWLOC-GATHER-TOPOLOGY "1" "%HWLOC_DATE%" "%PACKAGE_VERSION%" "%PACKAGE_NAME%" .SH NAME -hwloc-gather-topology \- Saves the relevant Linux topology files +hwloc-gather-topology \- Saves the relevant Linux and x86 topology files and the lstopo textual and XML outputs for later (possibly offline) usage . .\" ************************** @@ -23,9 +23,16 @@ and the lstopo textual and XML outputs for later (possibly offline) usage \fB\-\-io\fR Also gather I/O related files. The gathering may be much slower, and the generated archive may be much bigger. +. +.TP \fB\-\-dmi\fR Also gather DMI/SMBIOS related files. The gathering requires root access, and the dmi-sysfs kernel module should be loaded. +. +.TP +\fB\-\-no\-cpuid\fR +Do not gather x86 CPUID dump using \fIhwloc\-gather\-cpuid\fR. +. .TP \fB\-h\fR \fB\-\-help\fR Display help message and exit. @@ -140,5 +147,5 @@ such as (but not limited to) failure to create the archive or output files. .SH SEE ALSO . .ft R -hwloc(7), lstopo(1), hwloc-calc(1), hwloc-distrib(1) +hwloc(7), lstopo(1), hwloc-gather-cpuid(1), hwloc-calc(1), hwloc-distrib(1) .sp diff --git a/utils/hwloc/hwloc-gather-topology.in b/utils/hwloc/hwloc-gather-topology.in index 960294a260..e38804b255 100644 --- a/utils/hwloc/hwloc-gather-topology.in +++ b/utils/hwloc/hwloc-gather-topology.in @@ -17,6 +17,7 @@ localstatedir="@localstatedir@" runstatedir="@HWLOC_runstatedir@" # this will be changed into $bindir/lstopo during make install lstopo="$HWLOC_top_builddir/utils/lstopo/lstopo-no-graphics" +hgcpuid="$HWLOC_top_builddir/utils/hwloc/hwloc-gather-cpuid" # make sure we use default numeric formats LANG=C @@ -26,6 +27,7 @@ export LANG LC_ALL # don't let ls append special chars after symlinks etc unalias -a ls +gathercpuid=1 gatherio=0 gatherdmi=0 keep=0 @@ -47,9 +49,10 @@ usage() echo " Saves the Linux topology files (/sys, /proc, ...) under .tar.bz2" echo " and the corresponding lstopo verbose output under .output" echo "Options:" - echo " --io Gather I/O files (takes much longer and generates much larger tarball)" - echo " --dmi Gather SMBIOS files. Works only when run as root. Requires dmi-sysfs kernel module" - echo " --keep Keep the temporary copy of dumped files" + echo " --io Gather I/O files (takes much longer and generates much larger tarball)" + echo " --dmi Gather SMBIOS files. Works only when run as root. Requires dmi-sysfs kernel module" + echo " --no-cpuid Do not gather x86 CPUID using hwloc-gather-cpuid" + echo " --keep Keep the temporary copy of dumped files" echo "Example:" echo " $0 /tmp/\$(uname -n)" } @@ -58,6 +61,7 @@ while [ x`echo "$1" | cut -c1` = x- ] ; do case $1 in --io) gatherio=1;; --dmi) gatherdmi=1;; + --no-cpuid) gathercpuid=0;; --keep) keep=1;; -h|--help) usage; exit 0;; *) echo "Unrecognized option: $1"; usage; exit 1;; @@ -307,6 +311,15 @@ export HWLOC_DUMP_NOFILE_INFO="$destdir/$basename/proc/hwloc-nofile-info" # disable HWLOC_DUMP_NOFILE_INFO for next lstopo invocation export HWLOC_DUMP_NOFILE_INFO= +# +# Export cpuid if available +# +if [ x$gathercpuid = x1 -a -e "$hgcpuid" ]; then + echo + echo "Exporting x86 CPUID using hwloc-gather-cpuid" + $hgcpuid --silent "$destdir/$basename/cpuid" +fi + # Create the archive and optionally keep the tree in /tmp for testing echo ( cd "$destdir/" && tar cfj "$basename.tar.bz2" "$basename" ) diff --git a/utils/hwloc/hwloc-info.c b/utils/hwloc/hwloc-info.c index 2f3b520c19..4abf6ef110 100644 --- a/utils/hwloc/hwloc-info.c +++ b/utils/hwloc/hwloc-info.c @@ -361,8 +361,10 @@ main (int argc, char *argv[]) hwloc_utils_check_api_version(callname); /* enable verbose backends */ - putenv((char *) "HWLOC_XML_VERBOSE=1"); - putenv((char *) "HWLOC_SYNTHETIC_VERBOSE=1"); + if (!getenv("HWLOC_XML_VERBOSE")) + putenv((char *) "HWLOC_XML_VERBOSE=1"); + if (!getenv("HWLOC_SYNTHETIC_VERBOSE")) + putenv((char *) "HWLOC_SYNTHETIC_VERBOSE=1"); err = hwloc_topology_init (&topology); if (err) @@ -522,7 +524,7 @@ main (int argc, char *argv[]) hwloc_topology_set_flags(topology, flags); if (input) { - err = hwloc_utils_enable_input_format(topology, input, &input_format, verbose_mode, callname); + err = hwloc_utils_enable_input_format(topology, flags, input, &input_format, verbose_mode, callname); if (err) return err; } diff --git a/utils/hwloc/hwloc-patch.c b/utils/hwloc/hwloc-patch.c index 5cff9121c6..9820dcff0b 100644 --- a/utils/hwloc/hwloc-patch.c +++ b/utils/hwloc/hwloc-patch.c @@ -78,7 +78,8 @@ int main(int argc, char *argv[]) hwloc_utils_check_api_version(callname); - putenv((char *) "HWLOC_XML_VERBOSE=1"); + if (!getenv("HWLOC_XML_VERBOSE")) + putenv((char *) "HWLOC_XML_VERBOSE=1"); while (argc && *argv[0] == '-') { if (!strcmp (argv[0], "-R") || !strcmp (argv[0], "--reverse")) { diff --git a/utils/hwloc/misc.h b/utils/hwloc/misc.h index 98a36a1036..ab83e71539 100644 --- a/utils/hwloc/misc.h +++ b/utils/hwloc/misc.h @@ -81,7 +81,8 @@ enum hwloc_utils_input_format { HWLOC_UTILS_INPUT_XML, HWLOC_UTILS_INPUT_FSROOT, HWLOC_UTILS_INPUT_SYNTHETIC, - HWLOC_UTILS_INPUT_CPUID + HWLOC_UTILS_INPUT_CPUID, + HWLOC_UTILS_INPUT_SHMEM }; static __hwloc_inline enum hwloc_utils_input_format @@ -93,6 +94,8 @@ hwloc_utils_parse_input_format(const char *name, const char *callname) return HWLOC_UTILS_INPUT_XML; else if (!hwloc_strncasecmp(name, "fsroot", 1)) return HWLOC_UTILS_INPUT_FSROOT; + else if (!hwloc_strncasecmp(name, "shmem", 5)) + return HWLOC_UTILS_INPUT_SHMEM; else if (!hwloc_strncasecmp(name, "synthetic", 1)) return HWLOC_UTILS_INPUT_SYNTHETIC; else if (!hwloc_strncasecmp(name, "cpuid", 1)) @@ -177,6 +180,12 @@ hwloc_utils_autodetect_input_format(const char *input, int verbose) return HWLOC_UTILS_INPUT_SYNTHETIC; } if (S_ISREG(inputst.st_mode)) { + size_t len = strlen(input); + if (len >= 6 && !strcmp(input+len-6, ".shmem")) { + if (verbose > 0) + printf("assuming `%s' is a shmem topology file\n", input); + return HWLOC_UTILS_INPUT_SHMEM; + } if (verbose > 0) printf("assuming `%s' is a XML file\n", input); return HWLOC_UTILS_INPUT_XML; @@ -208,7 +217,7 @@ hwloc_utils_autodetect_input_format(const char *input, int verbose) } static __hwloc_inline int -hwloc_utils_enable_input_format(struct hwloc_topology *topology, +hwloc_utils_enable_input_format(struct hwloc_topology *topology, unsigned long flags, const char *input, enum hwloc_utils_input_format *input_format, int verbose, const char *callname) @@ -246,9 +255,12 @@ hwloc_utils_enable_input_format(struct hwloc_topology *topology, putenv((char *) "HWLOC_DUMPED_HWDATA_DIR=/var/run/hwloc"); env = getenv("HWLOC_COMPONENTS"); if (env) - fprintf(stderr, "Cannot force linux and linuxio components first because HWLOC_COMPONENTS environment variable is already set to %s.\n", env); + fprintf(stderr, "Cannot force linux component first because HWLOC_COMPONENTS environment variable is already set to %s.\n", env); else - putenv((char *) "HWLOC_COMPONENTS=linux,linuxio,stop"); + putenv((char *) "HWLOC_COMPONENTS=linux,pci,stop"); + /* normally-set flags are overriden by envvar-forced backends */ + if (flags & HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM) + putenv((char *) "HWLOC_THISSYSTEM=1"); #else /* HWLOC_LINUX_SYS */ fprintf(stderr, "This installation of hwloc does not support changing the file-system root, sorry.\n"); exit(EXIT_FAILURE); @@ -271,6 +283,9 @@ hwloc_utils_enable_input_format(struct hwloc_topology *topology, fprintf(stderr, "Cannot force x86 component first because HWLOC_COMPONENTS environment variable is already set to %s.\n", env); else putenv((char *) "HWLOC_COMPONENTS=x86,stop"); + /* normally-set flags are overriden by envvar-forced backends */ + if (flags & HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM) + putenv((char *) "HWLOC_THISSYSTEM=1"); #else fprintf(stderr, "This installation of hwloc does not support loading from a cpuid dump, sorry.\n"); exit(EXIT_FAILURE); @@ -285,6 +300,9 @@ hwloc_utils_enable_input_format(struct hwloc_topology *topology, } break; + case HWLOC_UTILS_INPUT_SHMEM: + break; + case HWLOC_UTILS_INPUT_DEFAULT: assert(0); } @@ -293,15 +311,20 @@ hwloc_utils_enable_input_format(struct hwloc_topology *topology, } static __hwloc_inline void -hwloc_utils_print_distance_matrix(FILE *output, unsigned nbobjs, hwloc_obj_t *objs, hwloc_uint64_t *matrix, int logical) +hwloc_utils_print_distance_matrix(FILE *output, unsigned nbobjs, hwloc_obj_t *objs, hwloc_uint64_t *matrix, int logical, int show_types) { unsigned i, j; /* column header */ fprintf(output, " index"); for(j=0; jlogical_index : objs[j]->os_index)); + if (show_types) + fprintf(output, " %s:%d", + hwloc_obj_type_string(objs[j]->type), + (int) (logical ? objs[j]->logical_index : objs[j]->os_index)); + else + fprintf(output, " % 5d", + (int) (logical ? objs[j]->logical_index : objs[j]->os_index)); } fprintf(output, "\n"); @@ -433,6 +456,22 @@ hwloc_utils_userdata_export_cb(void *reserved, hwloc_topology_t topology, hwloc_ } } +/* to be called when importing from shmem with non-NULL userdata pointing to stuff in the other process */ +static __hwloc_inline void +hwloc_utils_userdata_clear_recursive(hwloc_obj_t obj) +{ + hwloc_obj_t child; + obj->userdata= NULL; + for_each_child(child, obj) + hwloc_utils_userdata_clear_recursive(child); + for_each_memory_child(child, obj) + hwloc_utils_userdata_clear_recursive(child); + for_each_io_child(child, obj) + hwloc_utils_userdata_clear_recursive(child); + for_each_misc_child(child, obj) + hwloc_utils_userdata_clear_recursive(child); +} + /* must be called once the caller has removed its own userdata */ static __hwloc_inline void hwloc_utils_userdata_free(hwloc_obj_t obj) diff --git a/utils/hwloc/test-fake-plugin.sh.in b/utils/hwloc/test-fake-plugin.sh.in index 69a777cb78..ddbd14dabc 100644 --- a/utils/hwloc/test-fake-plugin.sh.in +++ b/utils/hwloc/test-fake-plugin.sh.in @@ -2,22 +2,20 @@ #-*-sh-*- # -# Copyright © 2009-2014 Inria. All rights reserved. +# Copyright © 2009-2019 Inria. All rights reserved. # Copyright © 2009, 2011 Université Bordeaux # Copyright © 2014 Cisco Systems, Inc. All rights reserved. # See COPYING in top-level directory. # HWLOC_top_builddir="@HWLOC_top_builddir@" -builddir="$HWLOC_top_builddir/utils/lstopo" -lstopo="$builddir/lstopo-no-graphics" +builddir="$HWLOC_top_builddir" +lstopo="$builddir/utils/lstopo/lstopo-no-graphics" +hcalc="$builddir/utils/hwloc/hwloc-calc" HWLOC_PLUGINS_PATH=${HWLOC_top_builddir}/hwloc export HWLOC_PLUGINS_PATH -HWLOC_DEBUG_FAKE_COMPONENT=1 -export HWLOC_DEBUG_FAKE_COMPONENT - HWLOC_DEBUG_CHECK=1 export HWLOC_DEBUG_CHECK @@ -35,6 +33,17 @@ file="$tmp/test-fake-plugin.output" set -e +echo "Checking that the tweak phase restricts to a single PU and single NUMA" +HWLOC_DEBUG_FAKE_COMPONENT_TWEAK=1 +export HWLOC_DEBUG_FAKE_COMPONENT_TWEAK + +test `$hcalc -N pu root` = 1 +test `$hcalc -N numa root` = 1 + +echo "Checking that the init/instantiate/finalize callbacks are invoked" +HWLOC_DEBUG_FAKE_COMPONENT=1 +export HWLOC_DEBUG_FAKE_COMPONENT + $lstopo > $file grep "fake component initialized" $file \ diff --git a/utils/hwloc/test-hwloc-calc.output b/utils/hwloc/test-hwloc-calc.output index 0d6d786172..e327f2505a 100644 --- a/utils/hwloc/test-hwloc-calc.output +++ b/utils/hwloc/test-hwloc-calc.output @@ -2,6 +2,7 @@ 0xffffffffffffffff 0x0000000f +0x11111111,0x0 0x00000f0c 0x0000800a diff --git a/utils/hwloc/test-hwloc-calc.sh.in b/utils/hwloc/test-hwloc-calc.sh.in index 11ed58f646..357c0d2e2d 100644 --- a/utils/hwloc/test-hwloc-calc.sh.in +++ b/utils/hwloc/test-hwloc-calc.sh.in @@ -3,7 +3,7 @@ # # Copyright © 2009 CNRS -# Copyright © 2009-2018 Inria. All rights reserved. +# Copyright © 2009-2019 Inria. All rights reserved. # Copyright © 2009, 2011 Université Bordeaux # Copyright © 2014 Cisco Systems, Inc. All rights reserved. # See COPYING in top-level directory. @@ -40,6 +40,7 @@ set -e $calc --if synthetic --input "node:4 core:4 pu:4" all --taskset echo $calc --if synthetic --input "node:4 core:4 pu:4" 0xf + $calc --if synthetic --input "node:4 core:4 pu:4" --no-smt node:2-3 $calc --if synthetic --input "node:4 core:4 pu:4" 0xf ~0x3 0xff0 '^0xf0' $calc --if synthetic --input "node:4 core:4 pu:4" core:0 pu:15 ~pu:0 '^pu:2' echo diff --git a/utils/hwloc/test-hwloc-distrib.output b/utils/hwloc/test-hwloc-distrib.output index 83194d171f..14df40815e 100644 --- a/utils/hwloc/test-hwloc-distrib.output +++ b/utils/hwloc/test-hwloc-distrib.output @@ -1,11 +1,3 @@ -0x0000000f -0x000000f0 - -0x00000003 -0x0000000c -0x00000030 -0x000000c0 - 0x00000001 0x00000002 0x00000004 @@ -16,112 +8,108 @@ 0x00000080 0x00000001 -0x00000001 -0x00000002 0x00000002 0x00000004 -0x00000004 0x00000008 0x00000010 -0x00000010 0x00000020 0x00000040 -0x00000040 0x00000080 - -0x00000001 0x00000001 0x00000002 -0x00000002 0x00000004 -0x00000004 -0x00000008 0x00000008 0x00000010 -0x00000010 -0x00000020 0x00000020 0x00000040 +0x00000080 +0x00000001 +0x00000002 +0x00000004 +0x00000008 +0x00000010 +0x00000020 0x00000040 0x00000080 +0x00000001 +0x00000002 +0x00000004 +0x00000008 +0x00000010 +0x00000020 +0x00000040 0x00000080 -0x00000007 -0x000001f8 -0x0003fe00 -0x07fc0000 - 0x00000001 +0x00000002 +0x00000004 0x00000008 -0x00000200 -0x00040000 - -0x07000000 -0x00fc0000 -0x0003fe00 -0x000001ff 0x04000000 +0x02000000 +0x01000000 0x00800000 -0x00020000 -0x00000100 -0x000000ff -0x0000ff00 +0 +1 +2 +3 -0x00000001 -0x00000100 +0 +1 +2 +3 -0x00008000 -0x00000080 +0x00000001 +0x00000002 +0x00000004 +0x00000008 -0x000000ff -0x0000ff00 -0xffff0000 -0x0000ffff,0x0 -0xffff0000,0x0 -0x000000ff,,0x0 -0x0000ff00,,0x0 -0xffff0000,,0x0 -0x0000ffff,,,0x0 -0xffff0000,,,0x0 -0x000000ff,,,,0x0 -0x0000ff00,,,,0x0 -0xffff0000,,,,0x0 -0x0000ffff,,,,,0x0 -0xffff0000,,,,,0x0 -0x0000ffff,,,,,,0x0 -0xffff0000,,,,,,0x0 -0x0000ffff,,,,,,,0x0 -0xffff0000,,,,,,,0x0 +0x00000001 +0x00000001,,0x0 +0x00000001,,,,0x0 +0x00000001,,,,,,0x0 +0x00000010 +0x00000010,,0x0 +0x00000010,,,,0x0 +0x00000010,,,,,,0x0 0x00000001 -0x00000002 -0x0000000c -0x00000030 -0x000000c0 -0x00000300 -0x00000c00 -0x00003000 -0x0000c000 +0x00000001,,0x0 +0x00000001,,,,0x0 +0x00000001,,,,,,0x0 +0x00010000 +0x00010000,,0x0 +0x00010000,,,,0x0 +0x00010000,,,,,,0x0 +0x00000001,0x0 +0x00000001,,,0x0 +0x00000001,,,,,0x0 +0x00000001,,,,,,,0x0 +0x00010000,0x0 +0x00010000,,,0x0 +0x00010000,,,,,0x0 +0x00010000,,,,,,,0x0 +0x00000010 +0x00000010,,0x0 +0x00000010,,,,0x0 0x00000001 -0x00000006 -0x00000018 -0x00000060 +0x00000002 +0x00000004 +0x00000008 +0x00000010 +0x00000020 +0x00000040 0x00000080 -0x00000300 -0x00000c00 -0x00003000 -0x0000c000 +0x00000100 -0x00000003 -0x00000003 -0x0000000c -0x00000030 -0x000000c0 -0x00000300 -0x00000c00 -0x00003000 -0x0000c000 +8 +196 +132 +68 +4 +192 +128 +64 diff --git a/utils/hwloc/test-hwloc-distrib.sh.in b/utils/hwloc/test-hwloc-distrib.sh.in index b6e5a4ae8c..27adfbebc2 100644 --- a/utils/hwloc/test-hwloc-distrib.sh.in +++ b/utils/hwloc/test-hwloc-distrib.sh.in @@ -35,38 +35,27 @@ file="$tmp/test-hwloc-distrib.output" set -e ( - $distrib --if synthetic --input "2 2 2" 2 + $distrib pu --if synthetic --input "4 4 4 4" -n 8 echo - $distrib --if synthetic --input "2 2 2" 4 + $distrib pu --if synthetic --input "1 2 2 2" -n 32 echo - $distrib --if synthetic --input "2 2 2" 8 + $distrib pu --if synthetic --input "3 3 3" -n 4 --single echo - $distrib --if synthetic --input "2 2 2" 13 + $distrib pu --if synthetic --input "3 3 3" -n 4 --reverse echo - $distrib --if synthetic --input "2 2 2" 16 + $distrib pu --if synthetic --input "3 3 3" -n 4 --logical-index echo - $distrib --if synthetic --input "3 3 3" 4 + $distrib pu --if synthetic --input "3 3 3" -n 4 --physical-index echo - $distrib --if synthetic --input "3 3 3" 4 --single + $distrib pu --if synthetic --input "3 3 3" -n 4 --from 0 echo - $distrib --if synthetic --input "3 3 3" 4 --reverse + $distrib pu:core:package --if synthetic --input "4 4 4 4" -n 8 + echo + $distrib scatter pu --if synthetic --input "4 4 4 4" -n 19 echo - $distrib --if synthetic --input "3 3 3" 4 --reverse --single + $distrib round-robin pu --if synthetic --input "2 2 2 2" -n 9 echo - $distrib --if synthetic --input "4 4" 2 - echo - $distrib --if synthetic --input "4 4" 2 --single - echo - $distrib --if synthetic --input "4 4" 2 --reverse --single - echo - $distrib --if synthetic --input "4 4 4 4" 19 - echo - - $distrib --if synthetic --input "2 2 2 2" 9 - echo - $distrib --if synthetic --input "2 2 2 2" --from pu 9 - echo - $distrib --if synthetic --input "2 2 2 2" --to core 9 + $distrib pu:core:package --if synthetic --input "4 4 4 4" -n 8 --logical-index --single --reverse --from 8 echo ) > "$file" @DIFF@ @HWLOC_DIFF_U@ @HWLOC_DIFF_W@ $srcdir/test-hwloc-distrib.output "$file" diff --git a/utils/lstopo/Makefile.am b/utils/lstopo/Makefile.am index ec72e0e300..310a2c6f1b 100644 --- a/utils/lstopo/Makefile.am +++ b/utils/lstopo/Makefile.am @@ -30,6 +30,10 @@ lstopo_no_graphics_SOURCES = \ lstopo-text.c \ lstopo-xml.c +if !HWLOC_HAVE_WINDOWS +lstopo_no_graphics_SOURCES += lstopo-shmem.c +endif + lstopo_no_graphics_CFLAGS = $(AM_CFLAGS) lstopo_no_graphics_LDADD = $(LDADD) -lm $(HWLOC_TERMCAP_LIBS) @@ -60,10 +64,11 @@ man1_pages = lstopo-no-graphics.1 EXTRA_DIST += $(man1_pages:.1=.1in) nodist_man_MANS = $(man1_pages) -if !HWLOC_HAVE_MINGW32 TESTS = \ test-lstopo.sh -endif !HWLOC_HAVE_MINGW32 +if HWLOC_HAVE_LINUX +TESTS += test-lstopo-shmem.sh +endif HWLOC_HAVE_LINUX if HWLOC_HAVE_CAIRO # only installed when lstopo is built with Cairo/X11 support diff --git a/utils/lstopo/lstopo-draw.c b/utils/lstopo/lstopo-draw.c index 62b8a4ccfa..484b0e2f04 100644 --- a/utils/lstopo/lstopo-draw.c +++ b/utils/lstopo/lstopo-draw.c @@ -33,6 +33,7 @@ struct lstopo_color BLACK_COLOR = { 0, 0, 0, 0 }; struct lstopo_color WHITE_COLOR = { 0xff, 0xff, 0xff, 0 }; struct lstopo_color PACKAGE_COLOR = { DARK_EPOXY_R_COLOR, DARK_EPOXY_G_COLOR, DARK_EPOXY_B_COLOR, 0 }; +struct lstopo_color DIE_COLOR = { EPOXY_R_COLOR, EPOXY_G_COLOR, EPOXY_B_COLOR, 0 }; struct lstopo_color MEMORY_COLOR = { 0xef, 0xdf, 0xde, 0 }; struct lstopo_color MEMORIES_COLOR = { 0xf2, 0xe8, 0xe8, 0}; /* slightly lighter than MEMORY_COLOR */ struct lstopo_color CORE_COLOR = { 0xbe, 0xbe, 0xbe, 0 }; @@ -77,6 +78,7 @@ declare_colors(struct lstopo_output *output) declare_color(output, &BLACK_COLOR); declare_color(output, &WHITE_COLOR); declare_color(output, &PACKAGE_COLOR); + declare_color(output, &DIE_COLOR); declare_color(output, &MEMORY_COLOR); declare_color(output, &MEMORIES_COLOR); declare_color(output, &CORE_COLOR); @@ -502,6 +504,7 @@ place_children(struct lstopo_output *loutput, hwloc_obj_t parent, unsigned children_width = 0, children_height = 0; unsigned above_children_width, above_children_height; unsigned existing_kinds; + int normal_children_are_PUs; hwloc_obj_t child; int ncstate; unsigned i; @@ -537,23 +540,27 @@ place_children(struct lstopo_output *loutput, hwloc_obj_t parent, orient = LSTOPO_ORIENT_HORIZ; } - /* recurse into children to prepare their sizes */ + /* recurse into children to prepare their sizes, + * and check whether all normal children are PUs. */ + normal_children_are_PUs = (parent->arity > 0); for(i = 0, child = next_child(loutput, parent, LSTOPO_CHILD_KIND_ALL, NULL, &ncstate); child; i++, child = next_child(loutput, parent, LSTOPO_CHILD_KIND_ALL, child, &ncstate)) { get_type_fun(child->type)(loutput, child, 0, 0, 0); + if (hwloc_obj_type_is_normal(child->type) && child->type != HWLOC_OBJ_PU) + normal_children_are_PUs = 0; } if (!i) return; /* no separator between PUs */ - if ((unsigned)parent->depth == loutput->depth-2) + if (normal_children_are_PUs) normal_children_separator = 0; /* add separator between a cache parent and its children */ if (hwloc_obj_type_is_cache(parent->type) || parent->type == HWLOC_OBJ_MEMCACHE) { - if ((unsigned)parent->depth == loutput->depth-2 || parent->type == HWLOC_OBJ_MEMCACHE) + if (normal_children_are_PUs || parent->type == HWLOC_OBJ_MEMCACHE) /* except between cache parent and PU children */ separator_below_cache = 0; /* update children placement */ @@ -718,7 +725,7 @@ lstopo_obj_snprintf(struct lstopo_output *loutput, char *text, size_t textlen, h /* by default we show logical+physical for PU/NUMA */ idx = obj->logical_index; indexprefix = " L#"; - } else if (obj->type == HWLOC_OBJ_PACKAGE || obj->type == HWLOC_OBJ_CORE) { + } else if (obj->type == HWLOC_OBJ_PACKAGE || obj->type == HWLOC_OBJ_DIE || obj->type == HWLOC_OBJ_CORE) { /* logical only for package+core (so that we see easily how many packages/cores there are */ idx = obj->logical_index; indexprefix = " L#"; @@ -877,6 +884,10 @@ lstopo_set_object_color(struct lstopo_output *loutput, s->bg = &PACKAGE_COLOR; break; + case HWLOC_OBJ_DIE: + s->bg = &DIE_COLOR; + break; + case HWLOC_OBJ_CORE: s->bg = &CORE_COLOR; break; @@ -1500,6 +1511,7 @@ get_type_fun(hwloc_obj_type_t type) case HWLOC_OBJ_MACHINE: case HWLOC_OBJ_NUMANODE: case HWLOC_OBJ_PACKAGE: + case HWLOC_OBJ_DIE: case HWLOC_OBJ_CORE: case HWLOC_OBJ_PU: case HWLOC_OBJ_GROUP: diff --git a/utils/lstopo/lstopo-no-graphics.1in b/utils/lstopo/lstopo-no-graphics.1in index 9087b448fa..73ea86f864 100644 --- a/utils/lstopo/lstopo-no-graphics.1in +++ b/utils/lstopo/lstopo-no-graphics.1in @@ -90,13 +90,13 @@ If the destination file already exists, overwrite it. Display hwloc logical indexes of all objects, with prefix "L#". . By default, both logical and physical/OS indexes are displayed for PUs and NUMA nodes, -logical only for cores and packages, and no index for other types. +logical only for cores, dies and packages, and no index for other types. .TP \fB\-p\fR \fB\-\-physical\fR Display OS/physical indexes of all objects, with prefix "P#". . By default, both logical and physical/OS indexes are displayed for PUs and NUMA nodes, -logical only for cores and packages, and no index for other types. +logical only for cores, dies and packages, and no index for other types. .TP \fB\-c\fR \fB\-\-cpuset\fR Display the cpuset of each object. @@ -134,6 +134,10 @@ Note also that the top-level object type cannot be ignored (usually Machine or S \fB\-\-ignore\fR This is the old way to specify \fB-\-filter :none\fR. .TP +\fB\-\-no\-smt\fR +Ignore PUs. +This is identical to \fB-\-filter PU:none\fR. +.TP \fB\-\-no\-caches\fR Do not show caches. This is identical to \fB-\-filter cache:none\fR. @@ -364,6 +368,8 @@ Report version and exit. \fB\-h\fR \fB\-\-help\fR Display help message and exit. . +.\" --shmem-output-addr is undocumented on purpose +. .\" ************************** .\" Description Section .\" ************************** diff --git a/utils/lstopo/lstopo-shmem.c b/utils/lstopo/lstopo-shmem.c new file mode 100644 index 0000000000..a17d2a469a --- /dev/null +++ b/utils/lstopo/lstopo-shmem.c @@ -0,0 +1,185 @@ +/* + * Copyright © 2018-2019 Inria. All rights reserved. + * See COPYING in top-level directory. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "lstopo.h" + +struct lstopo_shmem_header { +#define LSTOPO_SHMEM_HEADER_VERSION 1U + uint32_t header_version; + uint32_t header_length; + uint64_t mmap_address; + uint64_t mmap_length; + uint64_t file_offset; +}; + +extern uint64_t shmem_output_addr; + +#ifndef MAP_ANONYMOUS /* for Mac OS X 9 */ +#define MAP_ANONYMOUS MAP_ANON +#endif + +static unsigned long +find_mmap_addr(unsigned long length) +{ + unsigned long addr; + void *tmp_mmap; + int err; + + /* try to find a good address starting from something in the middle of the entire/full address space */ +#if SIZEOF_VOID_P == 8 + addr = 0x8000000000000000UL; +#else + addr = 0x80000000UL; +#endif + printf("Testing mmaps to find room for length %lu\n", length); + +again: + tmp_mmap = mmap((void*)(uintptr_t)addr, length, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, -1, 0); + if (tmp_mmap != MAP_FAILED) { + err = munmap((void*)(uintptr_t)tmp_mmap, length); + assert(!err); + if (tmp_mmap == (void*)(uintptr_t) addr) { + /* worked! */ + printf(" test mmap at 0x%lx succeeded, let's use that!\n", addr); + return addr; + } + printf(" test mmap at 0x%lx returned another address\n", addr); + } else + printf(" test mmap at 0x%lx failed (errno %d)\n", addr, errno); + /* couldn't map there, try again with a smaller address */ + addr >>= 1; + if (addr) + goto again; + + return 0; +} + +int +output_shmem(struct lstopo_output *loutput, const char *filename) +{ + struct lstopo_shmem_header header; + size_t shmem_length; + struct stat st; + int fd, err; + + if (!filename || !strcasecmp(filename, "-.shmem")) { + fprintf(stderr, "Cannot export shmem topology to stdout.\n"); + return -1; + } + + if (!stat(filename, &st) && !loutput->overwrite) { + fprintf(stderr, "Failed to export shmem topology to %s (%s)\n", filename, strerror(EEXIST)); + return -1; + } + + err = hwloc_shmem_topology_get_length(loutput->topology, &shmem_length, 0); + if (err < 0) { + if (errno == ENOSYS) + fprintf(stderr, "shmem topology not supported\n"); /* this line must match the grep line in test-lstopo-shmem */ + else + fprintf(stderr, "Failed to compute shmem topology export length\n"); + return -1; + } + + fd = open(filename, O_RDWR|O_TRUNC|O_CREAT, S_IRUSR|S_IWUSR); + if (fd < 0) { + fprintf(stderr, "Failed to open shmem topology file %s (%s)\n", filename, strerror(errno)); + return -1; + } + + if (!loutput->shmem_output_addr) + loutput->shmem_output_addr = find_mmap_addr(shmem_length); + if (!loutput->shmem_output_addr) { + fprintf(stderr, "Failed to find a shmem topology mmap address\n"); /* this line must match the grep line in test-lstopo-shmem */ + close(fd); + unlink(filename); + return -1; + } + + header.header_version = LSTOPO_SHMEM_HEADER_VERSION; + header.header_length = sizeof(header); + header.mmap_address = loutput->shmem_output_addr; + header.mmap_length = shmem_length; + header.file_offset = hwloc_getpagesize(); + err = write(fd, &header, sizeof(header)); + if (err != sizeof(header)) { + fprintf(stderr, "Failed to write shmem topology header\n"); + close(fd); + unlink(filename); + return -1; + } + + if (hwloc_shmem_topology_write(loutput->topology, fd, header.file_offset, (void*)(uintptr_t)loutput->shmem_output_addr, shmem_length, 0) < 0) { + if (errno == EBUSY) + fprintf(stderr, "Failed to export shmem topology, memory range is busy\n"); /* this line must match the grep line in test-lstopo-shmem */ + else + fprintf(stderr, "Failed to export shmem topology to %s (%s)\n", filename, strerror(errno)); + close(fd); + unlink(filename); + return -1; + } + + close(fd); + + printf("Exported shmem topology to %s for mmap address 0x%lx length %lu\n", filename, loutput->shmem_output_addr, (unsigned long) shmem_length); + + return 0; +} + +int lstopo_shmem_adopt(const char *input, hwloc_topology_t *topologyp) +{ + hwloc_topology_t adopted; + struct lstopo_shmem_header header; + int fd, err; + + fd = open(input, O_RDONLY); + if (fd < 0) + return -1; + + err = read(fd, &header, sizeof(header)); + if (err < (int) sizeof(header)) { + fprintf(stderr, "Failed to read shmem topology header\n"); + close(fd); + return -1; + } + + if (header.header_version != LSTOPO_SHMEM_HEADER_VERSION + || header.header_length != sizeof(header)) { + fprintf(stderr, "Unexpected shmem topology header version %u length %u (instead of %u %u)\n", + header.header_version, header.header_length, + LSTOPO_SHMEM_HEADER_VERSION, (unsigned) sizeof(header)); + close(fd); + return -1; + } + + err = hwloc_shmem_topology_adopt(&adopted, fd, header.file_offset, (void*)(uintptr_t)header.mmap_address, header.mmap_length, 0); + close(fd); + if (err < 0) { + if (errno == EBUSY) + fprintf(stderr, "Failed to adopt shmem topology, memory range is busy\n"); /* this line must match the grep line in test-lstopo-shmem */ + else + fprintf(stderr, "Failed to adopt shmem topology (%s)\n", strerror(errno)); + return -1; + } + + err = hwloc_topology_dup(topologyp, adopted); + hwloc_topology_destroy(adopted); + if (err < 0) { + fprintf(stderr, "Failed to duplicate adopted shmem topology (%s)\n", strerror(errno)); + return -1; + } + + return 0; +} diff --git a/utils/lstopo/lstopo-text.c b/utils/lstopo/lstopo-text.c index 0aae5e4df7..b5f0bc363f 100644 --- a/utils/lstopo/lstopo-text.c +++ b/utils/lstopo/lstopo-text.c @@ -225,13 +225,20 @@ static void output_distances(struct lstopo_output *loutput) if (!err) { for(j=0; jkind & HWLOC_DISTANCES_KIND_MEANS_LATENCY) ? "latency" : (dist[j]->kind & HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH) ? "bandwidth" : "distance"; - fprintf(output, "Relative %s matrix (kind %lu) between %u %ss (depth %d) by %s indexes:\n", - kindmeans, dist[j]->kind, - dist[j]->nbobjs, - hwloc_obj_type_string(dist[j]->objs[0]->type), - dist[j]->objs[0]->depth, - index_type != LSTOPO_INDEX_TYPE_PHYSICAL ? "logical" : "physical"); - hwloc_utils_print_distance_matrix(output, dist[j]->nbobjs, dist[j]->objs, dist[j]->values, index_type != LSTOPO_INDEX_TYPE_PHYSICAL); + if (dist[j]->kind & HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES) { + fprintf(output, "Relative %s matrix (name %s kind %lu) between %u heterogeneous objects by %s indexes:\n", + kindmeans, hwloc_distances_get_name(topology, dist[j]), dist[j]->kind, + dist[j]->nbobjs, + index_type != LSTOPO_INDEX_TYPE_PHYSICAL ? "logical" : "physical"); + } else { + fprintf(output, "Relative %s matrix (name %s kind %lu) between %u %ss (depth %d) by %s indexes:\n", + kindmeans, hwloc_distances_get_name(topology, dist[j]), dist[j]->kind, + dist[j]->nbobjs, + hwloc_obj_type_string(dist[j]->objs[0]->type), + dist[j]->objs[0]->depth, + index_type != LSTOPO_INDEX_TYPE_PHYSICAL ? "logical" : "physical"); + } + hwloc_utils_print_distance_matrix(output, dist[j]->nbobjs, dist[j]->objs, dist[j]->values, index_type != LSTOPO_INDEX_TYPE_PHYSICAL, dist[j]->kind & HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES); hwloc_distances_release(topology, dist[j]); } } diff --git a/utils/lstopo/lstopo.c b/utils/lstopo/lstopo.c index ad5d87118b..f5d015af8b 100644 --- a/utils/lstopo/lstopo.c +++ b/utils/lstopo/lstopo.c @@ -11,6 +11,7 @@ #ifdef HWLOC_LINUX_SYS #include "hwloc/linux.h" #endif /* HWLOC_LINUX_SYS */ +#include "hwloc/shmem.h" #include #include @@ -372,6 +373,7 @@ void usage(const char *name, FILE *where) fprintf (where, " --filter : Filter objects of the given type, or all.\n"); fprintf (where, " may be `all' (keep all), `none' (remove all), `structure' or `important'\n"); fprintf (where, " --ignore Ignore objects of the given type\n"); + fprintf (where, " --no-smt Ignore PUs\n"); fprintf (where, " --no-caches Do not show caches\n"); fprintf (where, " --no-useless-caches Do not show caches which do not have a hierarchical\n" " impact\n"); @@ -424,6 +426,7 @@ void usage(const char *name, FILE *where) " Set flags during the XML topology export\n"); fprintf (where, " --export-synthetic-flags \n" " Set flags during the synthetic topology export\n"); + /* --shmem-output-addr is undocumented on purpose */ fprintf (where, " --ps --top Display processes within the hierarchy\n"); fprintf (where, " --version Report version and exit\n"); } @@ -504,6 +507,7 @@ enum output_format { LSTOPO_OUTPUT_CAIROSVG, LSTOPO_OUTPUT_NATIVESVG, LSTOPO_OUTPUT_XML, + LSTOPO_OUTPUT_SHMEM, LSTOPO_OUTPUT_ERROR }; @@ -535,6 +539,8 @@ parse_output_format(const char *name, char *callname __hwloc_attribute_unused) return LSTOPO_OUTPUT_NATIVESVG; else if (!strcasecmp(name, "xml")) return LSTOPO_OUTPUT_XML; + else if (!strcasecmp(name, "shmem")) + return LSTOPO_OUTPUT_SHMEM; else return LSTOPO_OUTPUT_ERROR; } @@ -598,6 +604,7 @@ main (int argc, char *argv[]) loutput.export_synthetic_flags = 0; loutput.export_xml_flags = 0; + loutput.shmem_output_addr = 0; loutput.legend = 1; loutput.legend_append = NULL; @@ -641,8 +648,10 @@ main (int argc, char *argv[]) loutput.show_disallowed = 1; /* enable verbose backends */ - putenv((char *) "HWLOC_XML_VERBOSE=1"); - putenv((char *) "HWLOC_SYNTHETIC_VERBOSE=1"); + if (!getenv("HWLOC_XML_VERBOSE")) + putenv((char *) "HWLOC_XML_VERBOSE=1"); + if (!getenv("HWLOC_SYNTHETIC_VERBOSE")) + putenv((char *) "HWLOC_SYNTHETIC_VERBOSE=1"); /* Use localized time prints, and utf-8 characters in the ascii output */ #ifdef HAVE_SETLOCALE @@ -770,6 +779,9 @@ main (int argc, char *argv[]) hwloc_topology_set_type_filter(topology, type, HWLOC_TYPE_FILTER_KEEP_NONE); opt = 1; } + else if (!strcmp (argv[0], "--no-smt")) { + loutput.ignore_pus = 1; + } else if (!strcmp (argv[0], "--no-caches")) { hwloc_topology_set_cache_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter(topology, HWLOC_OBJ_MEMCACHE, HWLOC_TYPE_FILTER_KEEP_NONE); @@ -1064,6 +1076,13 @@ main (int argc, char *argv[]) opt = 1; } + else if (!strcmp (argv[0], "--shmem-output-addr")) { + if (argc < 2) + goto out_usagefailure; + loutput.shmem_output_addr = strtoull(argv[1], NULL, 0); + opt = 1; + } + else if (hwloc_utils_lookup_input_option(argv, argc, &opt, &input, &input_format, callname)) { @@ -1107,7 +1126,7 @@ main (int argc, char *argv[]) } if (input) { - err = hwloc_utils_enable_input_format(topology, input, &input_format, loutput.verbose_mode > 1, callname); + err = hwloc_utils_enable_input_format(topology, flags, input, &input_format, loutput.verbose_mode > 1, callname); if (err) goto out_with_topology; } @@ -1160,10 +1179,26 @@ main (int argc, char *argv[]) clock_gettime(CLOCK_MONOTONIC, &ts1); #endif - err = hwloc_topology_load (topology); - if (err) { - fprintf(stderr, "hwloc_topology_load() failed (%s).\n", strerror(errno)); - goto out_with_topology; + if (input_format == HWLOC_UTILS_INPUT_SHMEM) { +#ifdef HWLOC_WIN_SYS + fprintf(stderr, "shmem topology not supported\n"); /* this line must match the grep line in test-lstopo-shmem */ + goto out; +#else /* !HWLOC_WIN_SYS */ + /* load from shmem, and duplicate onto topology, so that we may modify it */ + hwloc_topology_destroy(topology); + err = lstopo_shmem_adopt(input, &topology); + if (err < 0) + goto out; + hwloc_utils_userdata_clear_recursive(hwloc_get_root_obj(topology)); +#endif /* !HWLOC_WIN_SYS */ + + } else { + /* normal load */ + err = hwloc_topology_load (topology); + if (err) { + fprintf(stderr, "hwloc_topology_load() failed (%s).\n", strerror(errno)); + goto out_with_topology; + } } if (allow_flags) { @@ -1286,6 +1321,11 @@ main (int argc, char *argv[]) case LSTOPO_OUTPUT_XML: output_func = output_xml; break; +#ifndef HWLOC_WIN_SYS + case LSTOPO_OUTPUT_SHMEM: + output_func = output_shmem; + break; +#endif default: fprintf(stderr, "file format not supported\n"); goto out_usagefailure; diff --git a/utils/lstopo/lstopo.h b/utils/lstopo/lstopo.h index 3faf03cbdf..ac3585b352 100644 --- a/utils/lstopo/lstopo.h +++ b/utils/lstopo/lstopo.h @@ -59,6 +59,7 @@ struct lstopo_output { /* export config */ unsigned long export_synthetic_flags; unsigned long export_xml_flags; + uint64_t shmem_output_addr; /* legend */ int legend; @@ -190,7 +191,9 @@ struct lstopo_obj_userdata { }; typedef int output_method (struct lstopo_output *output, const char *filename); -extern output_method output_console, output_synthetic, output_ascii, output_fig, output_png, output_pdf, output_ps, output_nativesvg, output_cairosvg, output_x11, output_windows, output_xml; +extern output_method output_console, output_synthetic, output_ascii, output_fig, output_png, output_pdf, output_ps, output_nativesvg, output_cairosvg, output_x11, output_windows, output_xml, output_shmem; + +extern int lstopo_shmem_adopt(const char *input, hwloc_topology_t *topologyp); struct draw_methods { int (*declare_color) (struct lstopo_output *loutput, struct lstopo_color *lcolor); diff --git a/utils/lstopo/test-lstopo-shmem.sh.in b/utils/lstopo/test-lstopo-shmem.sh.in new file mode 100644 index 0000000000..14e6d64720 --- /dev/null +++ b/utils/lstopo/test-lstopo-shmem.sh.in @@ -0,0 +1,60 @@ +#!/bin/sh +#-*-sh-*- + +# Copyright © 2009-2019 Inria. All rights reserved. +# See COPYING in top-level directory. +# + +HWLOC_top_builddir="@HWLOC_top_builddir@" +builddir="$HWLOC_top_builddir/utils/lstopo" +ls="$builddir/lstopo-no-graphics" + +HWLOC_PLUGINS_PATH=${HWLOC_top_builddir}/hwloc +export HWLOC_PLUGINS_PATH + +HWLOC_DEBUG_CHECK=1 +export HWLOC_DEBUG_CHECK + +: ${TMPDIR=/tmp} +{ + tmp=` + (umask 077 && mktemp -d "$TMPDIR/fooXXXXXX") 2>/dev/null + ` && + test -n "$tmp" && test -d "$tmp" +} || { + tmp=$TMPDIR/foo$$-$RANDOM + (umask 077 && mkdir "$tmp") +} || exit $? +echo "Outputs will be sent to $tmp" + +unset DISPLAY + +echo "Exporting to $tmp/test.shmem ..." +$ls $tmp/test.shmem > $tmp/test.shmem.out 2> $tmp/test.shmem.err +return=$? +cat $tmp/test.shmem.out $tmp/test.shmem.err +if test $return != 0; then + if grep "shmem topology not supported" $tmp/test.shmem.err >/dev/null \ + || grep "Failed to find a shmem topology mmap address" $tmp/test.shmem.err >/dev/null \ + || grep "Failed to export shmem topology, memory range is busy" $tmp/test.shmem.err >/dev/null; then + echo "Expected error during export, skipping this test" + exit 77 + fi + echo "Failed" + exit 1 +fi + +echo "Importing from $tmp/test.shmem ..." +$ls -i $tmp/test.shmem - > $tmp/test.shmem.out2 2> $tmp/test.shmem.err2 +return=$? +cat $tmp/test.shmem.out2 $tmp/test.shmem.err2 +if test $return != 0; then + if grep "Failed to adopt shmem topology, memory range is busy" $tmp/test.shmem.err2 >/dev/null; then + echo "Expected error during import, skipping this test" + exit 77 + fi + echo "Failed" + exit 1 +fi + +rm -rf "$tmp" diff --git a/utils/lstopo/test-lstopo.sh.in b/utils/lstopo/test-lstopo.sh.in index fbe15f5f6b..935319afe1 100644 --- a/utils/lstopo/test-lstopo.sh.in +++ b/utils/lstopo/test-lstopo.sh.in @@ -84,9 +84,9 @@ $ls $tmp/test.svg echo "** XML output in $tmp/test.xml ..." $ls $tmp/test.xml -echo "** Minimalistic XML output in test.xml ..." +echo "** Minimalistic XML output in $tmp/test.mini.xml ..." HWLOC_LIBXML_EXPORT=0 $ls $tmp/test.mini.xml -echo "** XMLv1 output in test.v1.xml ..." +echo "** XMLv1 output in $tmp/test.v1.xml ..." $ls --export-xml-flags 1 $tmp/test.v1.xml file="$tmp/test-lstopo.output"