diff --git a/doc/manual/source/profiling.rst b/doc/manual/source/profiling.rst index e7bc205f4..c98cfabea 100644 --- a/doc/manual/source/profiling.rst +++ b/doc/manual/source/profiling.rst @@ -1043,13 +1043,21 @@ project configuration. To use the tracer, call cmake directly from a clean CMake .. sourcecode:: console - ~/ns-3-dev/cmake-cache$ cmake .. --profiling-format=google-trace --profiling-output=trace.log + ~/ns-3-dev/cmake-cache$ cmake .. --profiling-format=google-trace --profiling-output=../cmake_performance_trace.log + + +Or using the ns3 wrapper: + +.. sourcecode:: console + + ~/ns-3-dev$ ./ns3 configure --trace-performance + .. _Perfetto UI: https://ui.perfetto.dev/ -The ``trace.log`` file will be generated, and can be visualized using the ``about:tracing`` panel -available in Chromium-based browsers or compatible trace viewer such as -`Perfetto UI`_. +A ``cmake_performance_trace.log`` file will be generated in the ns-3-dev directory. +The tracing results can be visualized using the ``about:tracing`` panel available +in Chromium-based browsers or a compatible trace viewer such as `Perfetto UI`_. After opening the trace file, select the traced process and click on any of the blocks to inspect the different stacks and find hotspots. @@ -1063,9 +1071,25 @@ to identify hotspots and focus on trying to optimize what matters most. .. _issue #588: https://gitlab.com/nsnam/ns-3-dev/-/issues/588 The trace below was generated during the discussion of `issue #588`_, -while using a HDD, which adds significant overhead to the CMake -configuration step. +while investigating the long configuration times, especially when using HDDs. + +The single largest contributor was CMake's ``configure_file``, used to keeping +up-to-date copies of headers in the output directory. .. image:: figures/perfetto-trace-cmake.png +.. _MR911: https://gitlab.com/nsnam/ns-3-dev/-/merge_requests/911 + +In `MR911`_, alternatives such as stub headers that include the original header +files, keeping them in their respective modules, and symlinking headers to the +output directory were used to reduce the configuration overhead. + +Note: when testing I/O bottlenecks, you may want to drop filesystem caches, +otherwise the cache may hide the issues. In Linux, the caches can be cleared +using the following command: + +.. sourcecode:: console + + ~/ns-3-dev$ sudo sysctl vm.drop_caches=3 + diff --git a/ns3 b/ns3 index ad0134cef..ffd77ae52 100755 --- a/ns3 +++ b/ns3 @@ -171,6 +171,9 @@ def parse_args(argv): parser_configure.add_argument('--quiet', help="Don't print task lines, i.e. messages saying which tasks are being executed.", action="store_true", default=None, dest="configure_quiet") + parser_configure.add_argument('--trace-performance', + help="Generate a performance trace log for the CMake configuration", + action="store_true", default=None, dest="trace_cmake_perf") parser_clean = sub_parser.add_parser('clean', help='Removes files created by ns3') parser_clean.add_argument('clean', action="store_true", default=False) @@ -565,8 +568,13 @@ def configure_cmake(cmake, args, current_cmake_cache_folder, current_cmake_gener # Try to set specified generator (will probably fail if there is an old cache) if args.G: - cmake_args.append("-G") - cmake_args.append(args.G) + cmake_args.extend(["-G", args.G]) + + if args.trace_cmake_perf: + cmake_performance_trace = os.path.join(os.path.relpath(ns3_path, current_cmake_cache_folder), + "cmake_performance_trace.log") + cmake_args.extend(["--profiling-format=google-trace", + "--profiling-output="+cmake_performance_trace]) # Append CMake flags passed using the -- separator cmake_args.extend(args.program_args) diff --git a/utils/tests/test-ns3.py b/utils/tests/test-ns3.py index 1f94427f4..4eb3be7c3 100644 --- a/utils/tests/test-ns3.py +++ b/utils/tests/test-ns3.py @@ -1057,6 +1057,17 @@ class NS3ConfigureTestCase(NS3BaseTestCase): shutil.rmtree("contrib/calibre", ignore_errors=True) + def test_17_CMakePerformanceTracing(self): + """! + Test if CMake performance tracing works and produces the + cmake_performance_trace.log file + @return None + """ + return_code, stdout, stderr = run_ns3("configure --trace-performance") + self.assertEqual(return_code, 0) + self.assertIn("--profiling-format=google-trace --profiling-output=../cmake_performance_trace.log", stdout) + self.assertTrue(os.path.exists(os.path.join(ns3_path, "cmake_performance_trace.log"))) + class NS3BuildBaseTestCase(NS3BaseTestCase): """!