diff --git a/bazel/cc_toolchains/clang_cc_toolchain_config.bzl b/bazel/cc_toolchains/clang_cc_toolchain_config.bzl index 748050650c9c6..be3664b698cc5 100644 --- a/bazel/cc_toolchains/clang_cc_toolchain_config.bzl +++ b/bazel/cc_toolchains/clang_cc_toolchain_config.bzl @@ -643,22 +643,6 @@ def _impl(ctx): ], ) - # A feature that enables poisoning of value stores to detect use after - # potential reallocation bugs. - # - # TODO: Remove this and leave poisoning always on once these bugs are - # fixed. - poison_value_stores = feature( - name = "poison_value_stores", - requires = [feature_set(["asan"])], - flag_sets = [flag_set( - actions = all_compile_actions, - flag_groups = [flag_group(flags = [ - "-DCARBON_POISON_VALUE_STORES=1", - ])], - )], - ) - fuzzer = feature( name = "fuzzer", flag_sets = [ @@ -1161,7 +1145,6 @@ def _impl(ctx): asan, asan_min_size, enable_in_fastbuild, - poison_value_stores, fuzzer, layering_check, module_maps, diff --git a/bazel/check_deps/check_non_test_cc_deps.py b/bazel/check_deps/check_non_test_cc_deps.py index a0c75f9e4abb9..85d72b91e25b5 100644 --- a/bazel/check_deps/check_non_test_cc_deps.py +++ b/bazel/check_deps/check_non_test_cc_deps.py @@ -81,9 +81,9 @@ # exist. Specially diagnose them to try to provide a more helpful # message. if repo in ( - "@google_benchmark", - "@abseil-cpp", - "@googletest", + "@@google_benchmark+", + "@@abseil-cpp+", + "@@googletest+", ): sys.exit("ERROR: dependency only allowed in test code: %s" % dep) diff --git a/testing/file_test/file_test_base.cpp b/testing/file_test/file_test_base.cpp index 57da501ae61e8..41aba44c9680a 100644 --- a/testing/file_test/file_test_base.cpp +++ b/testing/file_test/file_test_base.cpp @@ -386,6 +386,9 @@ static auto RunSingleTest(FileTestInfo& test, bool single_threaded, if (absl::GetFlag(FLAGS_dump_output)) { std::unique_lock lock(output_mutex); llvm::errs() << "\n--- Dumping: " << test.test_name << "\n\n"; + } else if (absl::GetFlag(FLAGS_threads) == 1) { + std::unique_lock lock(output_mutex); + llvm::errs() << "\nTEST " << test.test_name << " "; } // Load expected output. @@ -544,12 +547,12 @@ static auto Main(int argc, char** argv) -> ErrorOr { Carbon::InitLLVM init_llvm(argc, argv); testing::InitGoogleTest(&argc, argv); - auto args = absl::ParseCommandLine(argc, argv); + auto positional_args = absl::ParseCommandLine(argc, argv); - if (args.size() > 1) { + if (positional_args.size() > 1) { ErrorBuilder b; b << "Unexpected arguments:"; - for (char* arg : llvm::ArrayRef(args).drop_front()) { + for (char* arg : llvm::ArrayRef(positional_args).drop_front()) { b << " " << FormatEscaped(arg); } return b; diff --git a/testing/file_test/file_test_base.h b/testing/file_test/file_test_base.h index 540e2365ebf52..a6668894de2d3 100644 --- a/testing/file_test/file_test_base.h +++ b/testing/file_test/file_test_base.h @@ -83,6 +83,11 @@ class FileTestBase { return {}; } + // Returns additional arguments to add to the Run call, if any. + virtual auto GetAdditionalArgs() const -> llvm::SmallVector { + return {}; + } + // Returns a regex to match the default file when a line may not be present. // May return nullptr if unused. If GetLineNumberReplacements returns an entry // with has_file=false, this is required. diff --git a/testing/file_test/run_test.cpp b/testing/file_test/run_test.cpp index 6015784ca711f..3d6a5997cfd80 100644 --- a/testing/file_test/run_test.cpp +++ b/testing/file_test/run_test.cpp @@ -134,16 +134,24 @@ auto RunTestFile(const FileTestBase& test_base, bool dump_output, } } + // The subclass can inject additional arguments unconditionally. + auto additional_args = test_base.GetAdditionalArgs(); + // Convert the arguments to StringRef and const char* to match the // expectations of PrettyStackTraceProgram and Run. llvm::SmallVector test_args_ref; llvm::SmallVector test_argv_for_stack_trace; - test_args_ref.reserve(test_file.test_args.size()); - test_argv_for_stack_trace.reserve(test_file.test_args.size() + 1); + test_args_ref.reserve(test_file.test_args.size() + additional_args.size()); + test_argv_for_stack_trace.reserve(test_file.test_args.size() + + additional_args.size() + 1); for (const auto& arg : test_file.test_args) { test_args_ref.push_back(arg); test_argv_for_stack_trace.push_back(arg.c_str()); } + for (const auto& arg : additional_args) { + test_args_ref.push_back(arg); + test_argv_for_stack_trace.push_back(arg.c_str()); + } // Add a trailing null so that this is a proper argv. test_argv_for_stack_trace.push_back(nullptr); diff --git a/toolchain/base/BUILD b/toolchain/base/BUILD index 5255190b16b98..f76d7d30c38a3 100644 --- a/toolchain/base/BUILD +++ b/toolchain/base/BUILD @@ -86,6 +86,7 @@ cc_library( cc_library( name = "value_store", + srcs = ["value_store.cpp"], hdrs = ["value_store.h"], deps = [ ":mem_usage", diff --git a/toolchain/base/value_store.cpp b/toolchain/base/value_store.cpp new file mode 100644 index 0000000000000..3a150ad07a9bc --- /dev/null +++ b/toolchain/base/value_store.cpp @@ -0,0 +1,106 @@ +// Part of the Carbon Language project, under the Apache License v2.0 with LLVM +// Exceptions. See /LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "toolchain/base/value_store.h" + +#include "common/ostream.h" + +#ifdef LLVM_ADDRESS_SANITIZER_BUILD + +namespace Carbon { + +namespace Internal { + +// Gets the global (thread-safe) decision to print poison logs. +// +// Initially this is called on each thread (with the same `val`) when deciding +// to print logs or not, and the first such thread to win which sets the static +// global bool. Further calls (which do not need to pass a value) just query and +// get back that `Condition`. +static auto GetShouldPrint(std::optional val = std::nullopt) -> bool { + static auto should_print = *val; + return should_print; +} + +struct Condition { + std::string label; + uint64_t counter; +}; + +static auto ParseAbortCondition(llvm::StringRef condition) -> Condition { + if (condition.empty()) { + return {.counter = static_cast(-1)}; + } + + auto [label, counter_str] = condition.split(':'); + if (counter_str.empty()) { + llvm::errs() + << "ERROR: --poison_abort condition should be 'label:counter'\n"; + std::exit(1); + } + uint64_t counter; + // Note: getAsInteger returns false on success @_@. + if (counter_str.getAsInteger(0, counter)) { + llvm::errs() << "ERROR: counter could not be parsed as an integer\n"; + llvm::errs() + << "ERROR: --poison_abort condition should be 'label:counter'\n"; + llvm::errs() << " NOTE: found label:'" << label << "' counter:'" + << counter_str << "'\n"; + std::exit(1); + } + return {.label = std::string(label), .counter = counter}; +} + +// Gets the global (thread-safe) abort Condition. +// +// Initially this is called on each thread (with the same `condition_str`) when +// setting the abort condition, and the first such thread to win which sets the +// static global `Condition`. Further calls without a string input just query +// and get back that `Condition`. +static auto GetAbortCondition(llvm::StringRef condition_str = "") -> Condition { + static Condition condition = ParseAbortCondition(condition_str); + return condition; +} + +auto LogPoison(llvm::StringRef label, int element) -> void { + if (!GetShouldPrint()) { + return; + } + static uint64_t counter = 0; + if (element < 0) { + llvm::errs() << "++ " << label << " PoisonAll (" << label << ":" << counter + << ")\n"; + } else { + llvm::errs() << "++ " << label << " PoisonElement " << element << " (" + << label << ":" << counter << ")\n"; + } + auto condition = GetAbortCondition(); + if (counter >= condition.counter && label == condition.label) { + llvm::errs() << "*** Aborting on poison event. Stack trace below.\n"; + std::abort(); + } + ++counter; +} + +auto LogUnpoison(llvm::StringRef label, int element) -> void { + if (!GetShouldPrint()) { + return; + } + if (element < 0) { + llvm::errs() << "-- " << label << " UnpoisonAll\n"; + } else { + llvm::errs() << "-- " << label << " UnpoisonElement " << element << '\n'; + } +} + +} // namespace Internal + +auto SetPoisonVerbose(bool v) -> void { Internal::GetShouldPrint(v); } +auto SetPoisonAbortCondition(llvm::StringRef s) -> void { + Internal::GetAbortCondition(s); +} + +} // namespace Carbon + +#endif diff --git a/toolchain/base/value_store.h b/toolchain/base/value_store.h index 015bb9bb32296..c16001e9c9797 100644 --- a/toolchain/base/value_store.h +++ b/toolchain/base/value_store.h @@ -23,21 +23,27 @@ namespace Carbon { namespace Internal { - // Used as a parent class for non-printable types. This is just for // std::conditional, not as an API. class ValueStoreNotPrintable {}; } // namespace Internal -// Setup our compile time condition controlling poisoning of value stores. This -// is set to one by the Bazel flag `--features=poison_value_stores`. -// -// TODO: Eventually, this will always enabled when ASan is enabled, but we can't -// do that until we clean up all of the latent bugs. -#ifndef CARBON_POISON_VALUE_STORES -#define CARBON_POISON_VALUE_STORES 0 -#elif !LLVM_ADDRESS_SANITIZER_BUILD -#error "CARBON_POISON_VALUE_STORES requires address sanitizer" +#ifdef LLVM_ADDRESS_SANITIZER_BUILD +namespace Internal { +// Logs the poison event to stderr if `SetPoisonVerbose()` was set to true. A +// negative `element` indicates the entire store was poisoned. +auto LogPoison(llvm::StringRef label, int element) -> void; +// Logs the unpoison event to stderr if `SetPoisonVerbose()` was set to true. A +// negative `element` indicates the entire store was unpoisoned. +auto LogUnpoison(llvm::StringRef label, int element) -> void; +} // namespace Internal + +// Control whether to print verbose ASAN poisoning logs showing each poison +// event in the value stores. See --poison_verbose. +auto SetPoisonVerbose(bool v) -> void; +// Set the condition on which an ASAN poison event will abort the process and +// dump a stack trace. See --poison_abort. +auto SetPoisonAbortCondition(llvm::StringRef s) -> void; #endif // A simple wrapper for accumulating values, providing IDs to later retrieve the @@ -66,7 +72,7 @@ class ValueStore ValueStore() = default; ValueStore(ValueStore&& other) noexcept : values_((other.UnpoisonAll(), std::move(other.values_))) -#if CARBON_POISON_VALUE_STORES +#ifdef LLVM_ADDRESS_SANITIZER_BUILD , all_poisoned_(false) #endif @@ -77,7 +83,7 @@ class ValueStore UnpoisonAll(); other.UnpoisonAll(); values_ = std::move(other.values_); -#if CARBON_POISON_VALUE_STORES +#ifdef LLVM_ADDRESS_SANITIZER_BUILD all_poisoned_ = false; #endif PoisonAll(); @@ -98,15 +104,13 @@ class ValueStore // Unpoison everything if the push will reallocate, in order to allow the // vector to make a copy of the elements. UnpoisonAll(); - } else { - PoisonAll(); } values_.push_back(std::move(value)); - if (realloc) { - PoisonAll(); - } else { + if (!PoisonAll()) { + // If we did not realloc, and the store was already poisoned, the + // PoisonAll did nothing. So we poison the new element. PoisonElement(id.index); } @@ -183,18 +187,22 @@ class ValueStore // Poison the entire contents of the value store. This is used to detect cases // where references to elements in a value store are used across calls that // might modify the store. - auto PoisonAll() const -> void { -#if CARBON_POISON_VALUE_STORES + auto PoisonAll() const -> bool { +#ifdef LLVM_ADDRESS_SANITIZER_BUILD if (!all_poisoned_) { + Internal::LogPoison(IdT::Label, -1); __asan_poison_memory_region(values_.data(), values_.size() * sizeof(values_[0])); all_poisoned_ = true; + return true; } #endif + return false; } // Unpoison the entire contents of the value store. auto UnpoisonAll() const -> void { -#if CARBON_POISON_VALUE_STORES +#ifdef LLVM_ADDRESS_SANITIZER_BUILD + Internal::LogUnpoison(IdT::Label, -1); __asan_unpoison_memory_region(values_.data(), values_.size() * sizeof(values_[0])); all_poisoned_ = false; @@ -202,13 +210,15 @@ class ValueStore } // Poison a single element. auto PoisonElement([[maybe_unused]] int element) const -> void { -#if CARBON_POISON_VALUE_STORES +#ifdef LLVM_ADDRESS_SANITIZER_BUILD + Internal::LogPoison(IdT::Label, element); __asan_unpoison_memory_region(values_.data() + element, sizeof(values_[0])); #endif } // Unpoison a single element. auto UnpoisonElement([[maybe_unused]] int element) const -> void { -#if CARBON_POISON_VALUE_STORES +#ifdef LLVM_ADDRESS_SANITIZER_BUILD + Internal::LogUnpoison(IdT::Label, element); __asan_unpoison_memory_region(values_.data() + element, sizeof(values_[0])); all_poisoned_ = false; #endif @@ -218,7 +228,7 @@ class ValueStore // stack, while this does make File smaller. llvm::SmallVector, 0> values_; -#if CARBON_POISON_VALUE_STORES +#ifdef LLVM_ADDRESS_SANITIZER_BUILD // Whether the vector is currently fully poisoned. // // We use this to avoid repeated re-poisoning of the entire store. Doing so is diff --git a/toolchain/check/class.cpp b/toolchain/check/class.cpp index fc1d9c63f2fad..85fb0442d807e 100644 --- a/toolchain/check/class.cpp +++ b/toolchain/check/class.cpp @@ -235,7 +235,9 @@ static auto CheckCompleteClassType( } if (class_info.is_dynamic) { - class_info.vtable_id = BuildVtable( + // This function imports, which can invalidate `class_info` and other + // pointers into value stores. + context.classes().Get(class_id).vtable_id = BuildVtable( context, node_id, defining_vptr ? SemIR::InstId::None : base_class_info->vtable_id, vtable_contents); diff --git a/toolchain/check/handle_impl.cpp b/toolchain/check/handle_impl.cpp index 2bf89edefe4b5..b28d2b1746443 100644 --- a/toolchain/check/handle_impl.cpp +++ b/toolchain/check/handle_impl.cpp @@ -566,6 +566,8 @@ auto HandleParseNode(Context& context, Parse::ImplDefinitionStartId node_id) impl_decl_id, impl_info.scope_id, context.generics().GetSelfSpecific(impl_info.generic_id)); StartGenericDefinition(context, impl_info.generic_id); + // This function imports, which can invalidate `impl_info` and other pointers + // into value stores. ImplWitnessStartDefinition(context, impl_info); context.inst_block_stack().Push(); context.node_stack().Push(node_id, impl_id); @@ -579,7 +581,8 @@ auto HandleParseNode(Context& context, Parse::ImplDefinitionStartId node_id) // // We may need to track a list of instruction blocks here, as we do for a // function. - impl_info.body_block_id = context.inst_block_stack().PeekOrAdd(); + context.impls().Get(impl_id).body_block_id = + context.inst_block_stack().PeekOrAdd(); return true; } diff --git a/toolchain/check/impl.cpp b/toolchain/check/impl.cpp index 1b984b05448f1..252d9f29385d0 100644 --- a/toolchain/check/impl.cpp +++ b/toolchain/check/impl.cpp @@ -98,7 +98,9 @@ auto ImplWitnessForDeclaration(Context& context, const SemIR::Impl& impl, context.generics().GetSelfSpecific(impl.generic_id), has_definition); } -auto ImplWitnessStartDefinition(Context& context, SemIR::Impl& impl) -> void { +// Note: `impl` is copied to avoid a pointer being invalidated when this +// function imports. +auto ImplWitnessStartDefinition(Context& context, SemIR::Impl impl) -> void { CARBON_CHECK(impl.is_being_defined()); CARBON_CHECK(impl.witness_id.has_value()); if (impl.witness_id == SemIR::ErrorInst::InstId) { @@ -122,6 +124,7 @@ auto ImplWitnessStartDefinition(Context& context, SemIR::Impl& impl) -> void { return; } + // This function imports, which can invalidate pointers into value stores. AllocateFacetTypeImplWitness(context, impl.interface.interface_id, witness_table.elements_id); witness_block = context.inst_blocks().GetMutable(witness_table.elements_id); @@ -166,7 +169,7 @@ auto ImplWitnessStartDefinition(Context& context, SemIR::Impl& impl) -> void { auto FinishImplWitness(Context& context, SemIR::ImplId impl_id) -> void { // Make a copy of the impl. We're going to reference it a lot, and `impl`s // could get invalidated by some of the things we do. - const auto impl = context.impls().Get(impl_id); + auto impl = context.impls().Get(impl_id); CARBON_CHECK(impl.is_being_defined()); CARBON_CHECK(impl.witness_id.has_value()); @@ -183,6 +186,7 @@ auto FinishImplWitness(Context& context, SemIR::ImplId impl_id) -> void { const auto& interface = context.interfaces().Get(impl.interface.interface_id); auto assoc_entities = context.inst_blocks().Get(interface.associated_entities_id); + auto interface_name_id = interface.name_id; llvm::SmallVector used_decl_ids; for (auto [assoc_entity, witness_value] : @@ -204,6 +208,8 @@ auto FinishImplWitness(Context& context, SemIR::ImplId impl_id) -> void { CARBON_FATAL("Unexpected type: {0}", type_inst); } auto& fn = context.functions().Get(fn_type->function_id); + // This function imports, which can invalidate `interface` and other + // pointers into value stores. auto lookup_result = LookupNameInExactScope(context, SemIR::LocId(decl_id), fn.name_id, impl.scope_id, impl_scope); @@ -219,7 +225,7 @@ auto FinishImplWitness(Context& context, SemIR::ImplId impl_id) -> void { SemIR::NameId, SemIR::NameId); auto builder = context.emitter().Build(impl.definition_id, ImplMissingFunction, - fn.name_id, interface.name_id); + fn.name_id, interface_name_id); NoteAssociatedFunction(context, builder, fn_type->function_id); builder.Emit(); diff --git a/toolchain/check/impl.h b/toolchain/check/impl.h index 586948916e49f..a4d90159f19b4 100644 --- a/toolchain/check/impl.h +++ b/toolchain/check/impl.h @@ -18,7 +18,7 @@ auto ImplWitnessForDeclaration(Context& context, const SemIR::Impl& impl, bool has_definition) -> SemIR::InstId; // Update `impl`'s witness at the start of a definition. -auto ImplWitnessStartDefinition(Context& context, SemIR::Impl& impl) -> void; +auto ImplWitnessStartDefinition(Context& context, SemIR::Impl impl) -> void; // Adds the function members to the witness for `impl`. auto FinishImplWitness(Context& context, SemIR::ImplId impl_id) -> void; diff --git a/toolchain/check/type_completion.cpp b/toolchain/check/type_completion.cpp index 0d136b53ce2fb..478210efff918 100644 --- a/toolchain/check/type_completion.cpp +++ b/toolchain/check/type_completion.cpp @@ -265,8 +265,8 @@ auto TypeCompleter::AddNestedIncompleteTypes(SemIR::Inst type_inst) -> bool { break; } case CARBON_KIND(SemIR::ClassType inst): { - auto& class_info = context_->classes().Get(inst.class_id); - if (!class_info.is_complete()) { + auto* class_info = &context_->classes().Get(inst.class_id); + if (!class_info->is_complete()) { if (diagnoser_) { auto builder = diagnoser_(); NoteIncompleteClass(*context_, inst.class_id, builder); @@ -275,14 +275,17 @@ auto TypeCompleter::AddNestedIncompleteTypes(SemIR::Inst type_inst) -> bool { return false; } if (inst.specific_id.has_value()) { + // This function runs eval, which can import and invalidate `class_info` + // and other pointers into value stores. ResolveSpecificDefinition(*context_, loc_id_, inst.specific_id); + class_info = &context_->classes().Get(inst.class_id); } if (auto adapted_type_id = - class_info.GetAdaptedType(context_->sem_ir(), inst.specific_id); + class_info->GetAdaptedType(context_->sem_ir(), inst.specific_id); adapted_type_id.has_value()) { Push(adapted_type_id); } else { - Push(class_info.GetObjectRepr(context_->sem_ir(), inst.specific_id)); + Push(class_info->GetObjectRepr(context_->sem_ir(), inst.specific_id)); } break; } diff --git a/toolchain/docs/adding_features.md b/toolchain/docs/adding_features.md index 3ca8481e93826..8f15290132b5b 100644 --- a/toolchain/docs/adding_features.md +++ b/toolchain/docs/adding_features.md @@ -22,9 +22,13 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - [Updating tests](#updating-tests) - [Reviewing test deltas](#reviewing-test-deltas) - [Minimal Core prelude](#minimal-core-prelude) + - [Debugging ASAN Poisoning](#debugging-asan-poisoning) + - [Non-determinism in the poison log](#non-determinism-in-the-poison-log) - [Verbose output](#verbose-output) - [Stack traces](#stack-traces) + - [ASAN stack trace quality](#asan-stack-trace-quality) - [Dumping objects in interactive debuggers](#dumping-objects-in-interactive-debuggers) + - [ASAN error: `malloc: nano zone abandoned`](#asan-error-malloc-nano-zone-abandoned) @@ -508,6 +512,165 @@ We have a set of minimal `Core` preludes for testing different compiler feature areas in `//toolchain/testing/min_prelude/`. Each file begins with the line `package Core library "prelude";` to make it provide a prelude. +### Debugging ASAN Poisoning + +If a pointer is held across a ValueStore being modified and then used afterward +it may have been invalidated and this is a bug. + +Our default build enables ASAN, and with ASAN enabled we look for these bugs by +poisoning ValueStores on modification. If a test fails due to ValueStore +poisoning, it will give an ASAN stack trace that says `use-after-poison` and +looks like this: + +``` +==12==ERROR: AddressSanitizer: use-after-poison on address 0x50800020feec at pc 0x55f9c9777abe bp 0x7fff51624df0 sp 0x7fff51624de8 +WRITE of size 4 at 0x50800020feec thread T0 + #0 0x55f9c9777abd in Carbon::Check::HandleParseNode(Carbon::Check::Context&, Carbon::Parse::NodeIdForKind) /proc/self/cwd/toolchain/check/handle_impl.cpp:584:27 + #1 0x55f9c9717b1e in Carbon::Check::CheckUnit::ProcessNodeIds() /proc/self/cwd/./toolchain/parse/node_kind.def:357:1 + #2 0x55f9c9712c0a in Carbon::Check::CheckUnit::Run() /proc/self/cwd/toolchain/check/check_unit.cpp:91:8 +``` + +Debugging use-after-poison is a little tricky, as it takes some work to +determine how the poisoning occurred. Here we will look at how to do this +debugging. + +Some suggested aliases for the common commands in this section: + +```sh +alias ptestall='bazel test //toolchain/testing:file_test --test_arg=--threads=1' +alias ptestfile='bazel run //toolchain/testing:file_test -- --dump_output --poison_verbose --file_tests' +``` + +If the test failed in the usual testing configuration, you will get a stack +trace but an ASAN stack does not display which test failed. To determine this, +run the tests again with `--threads=1`, which will print the name of each test +before running it: + +```sh +bazel test //toolchain/testing:file_test --test_arg=--threads=1 +``` + +In the failure log, it should now be clear which test failed, as it will be the +last test name printed before the ASAN stack trace, like this: + +``` +TEST toolchain/check/testdata/poisoned.carbon ================================================================= +==12==ERROR: AddressSanitizer: use-after-poison on address 0x50800020feec at pc 0x55f9c9777abe bp 0x7fff51624df0 sp 0x7fff51624de8 +WRITE of size 4 at 0x50800020feec thread T0 + #0 0x55f9c9777abd in Carbon::Check::HandleParseNode(Carbon::Check::Context&, Carbon::Parse::NodeIdForKind) /proc/self/cwd/toolchain/check/handle_impl.cpp:584:27 + #1 0x55f9c9717b1e in Carbon::Check::CheckUnit::ProcessNodeIds() /proc/self/cwd/./toolchain/parse/node_kind.def:357:1 + #2 0x55f9c9712c0a in Carbon::Check::CheckUnit::Run() /proc/self/cwd/toolchain/check/check_unit.cpp:91:8 +``` + +The ASAN stack trace printed on a use-after-poison includes two sections. To +debug, we need to get information from each stack trace section: + +1. The use-after-poison stack, which shows the invalid use of a pointer. +2. The allocation stack, which shows which `ValueStore` the pointer is into. + For example, if the invalid pointer read was to an `Impl`, the allocation + stack will name `ValueStore` in the first few frames below the + `llvm::SmallVector` frames. + +Now we know which pointer is invalid and where from (1) above. But we don't know +what invalidated it yet. To do that we need to get a stack trace for the +poisoning event. + +We do know which value store type was poisoned from (2) above. Run the +individual test that failed with `--poison_verbose` to list all poison events. +We only do this for a single test at a time because it prints a _lot_ and will +be too slow to run all the tests. + +```sh +bazel run //toolchain/testing:file_test -- --dump_output --poison_verbose --file_tests -- --dump_output --poison_verbose --file_tests path/to/test.carbon +``` + +This will print a lot of `Poison` and `Unpoison` log messages and eventually +crash again on the use-after-poison event. We use the information from (2) to +look up through the logs and find the last `Poison` event for the type of value +store from (2). For example, if the allocation stack showed +`ValueStore` then we'd look for the last `Poison` event on `impl`. + +For example, if the log ended at the use-after-poison crash as follows, then we +would be interested in the `++ impl PoisonAll (impl:911)` line: + +``` +-- inst UnpoisonElement 18 +-- interface UnpoisonElement 0 +-- inst_block UnpoisonElement 10 +++ facet_type PoisonAll (facet_type:910) +++ impl PoisonAll (impl:911) +++ interface PoisonAll (interface:912) +-- inst UnpoisonElement 31 +-- inst_block UnpoisonElement 5 +-- inst_block UnpoisonElement 5 +-- inst_block UnpoisonElement 5 +-- interface UnpoisonElement 0 +-- inst_block UnpoisonElement 10 +-- inst UnpoisonElement 34 +++ inst_block PoisonAll (inst_block:913) +================================================================= +==554912==ERROR: AddressSanitizer: use-after-poison on address 0x50800006c06c at pc 0x55ee267f0abe bp 0x7fff197bdbf0 sp 0x7fff197bdbe8 +WRITE of size 4 at 0x50800006c06c thread T0 + #0 0x55ee267f0abd in Carbon::Check::HandleParseNode(Carbon::Check::Context&, Carbon::Parse::NodeIdForKind) /proc/self/cwd/toolchain/check/handle_impl.cpp:584:27 +``` + +From the poison log, we get the label and counter value of interest. In the +example above that is `impl:911`, and we can use that with `--poison_abort` to +get the stack trace of the poisoning event, in order to find out where the +pointer was invalidated. + +```sh +bazel run //toolchain/testing:file_test -- --dump_output --poison_verbose --file_tests -- --dump_output --poison_verbose --file_tests path/to/test.carbon --poison_abort=impl:911 +``` + +If everything goes well, it will run up to this poison event and dump a stack +trace showing the source of the pointer invalidation: + +``` +-- interface UnpoisonElement 0 +-- inst UnpoisonElement 18 +-- interface UnpoisonElement 0 +-- inst_block UnpoisonElement 10 +++ facet_type PoisonAll (facet_type:910) +++ impl PoisonAll (impl:911) +*** Stopping on poison event. Stack trace below. +... + #0 0x000055d7bb2b623a ___interceptor_backtrace (bazel-bin/toolchain/testing/file_test+0xbe9023a) + #1 0x000055d7c7c74d1d llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) /proc/self/cwd/external/+llvm_project+llvm-project/llvm/lib/Support/Unix/Signals.inc:804:13 +... + 13 0x000055d7bd409b0a Invalidate /proc/self/cwd/./toolchain/sem_ir/impl.h:186:39 +#14 0x000055d7bd409b0a Carbon::Check::LoadImportRef(Carbon::Check::Context&, Carbon::SemIR::InstId) /proc/self/cwd/toolchain/check/import_ref.cpp:3217:19 +#15 0x000055d7bd38cf54 Carbon::Check::AllocateFacetTypeImplWitness(Carbon::Check::Context&, Carbon::SemIR::InterfaceId, Carbon::SemIR::InstBlockId) /proc/self/cwd/toolchain/check/facet_type.cpp:257:21 +``` + +In this example, `AllocateFacetTypeImplWitness()` caused an import to occur, and +imports can load arbitrary things and invalidate value stores. This shows use +where we need to stop using the pointer. We document the function call that +causes the invalidation and ensure code afterward avoids reusing the invalidated +pointer. + +Then rebuild and run the test again to see if the issue was correctly resolved, +and there are no further issues, iterating as needed: + +```sh +bazel run //toolchain/testing:file_test -- --dump_output --poison_verbose --file_tests -- --dump_output --poison_verbose --file_tests path/to/test.carbon +``` + +#### Non-determinism in the poison log + +The counter in the poison log can be non-deterministic across runs, +unfortunately, due to non-determinism in our data structures such as maps, and +sorting. For example, if you used `--poison_abort=impl:911`, you might see on +the next run that the last `impl` poison event is now `impl:908`. To help deal +with this, `--poison_abort` will abort when the label matches and the counter +value is any value equal to or greater than the one you specify. So using +`--poison_abort=impl:908` would then catch the poison event whether it was +recorded as `908` or `911` in the next run. + +If a ValueStore is invalidated frequently (such as the `inst` store), this +non-determinism may make the poison stack less reliable. It may require +collecting a few poison logs to find the correct one (sorry). + ### Verbose output The `-v` flag can be passed to trace state, and should be specified before the @@ -521,6 +684,16 @@ minimal context for how the current function is reached, we use LLVM's `PrettyStackTrace` to include details about the state stack. The state stack will be above the function stack in crash output. +#### ASAN stack trace quality + +In order to get a symbolized stack trace from ASAN (which is enabled in the +default build), ensure that `llvm-symbolizer` is in your path or set the +`LLVM_SYMBOLIZER_PATH` environment variable to point to the `llvm-symbolizer` +binary. + +If the quality of the stack trace is low, it is possible to enable ASAN and +debug symbols together by building under bazel with `--config=asan -c dbg`. + ### Dumping objects in interactive debuggers We provide namespace-scoped `Dump` functions in several components, such as @@ -530,3 +703,16 @@ regarding support. Objects which inherit from `Printable` also have `Dump` member functions, but these will lack contextual information. + +### ASAN error: `malloc: nano zone abandoned` + +On MacOS when running ASAN binaries directly, you will get this error message in +stderr: + +``` +file_test(61907,0x20b351f00) malloc: nano zone abandoned due to inability to reserve vm space. +``` + +To avoid this, set `MallocNanoZone=0` in your environment. This issue is tracked +in https://github.com/google/sanitizers/issues/1666. Note that when running +binaries through bazel we set this environment variable for you. diff --git a/toolchain/driver/BUILD b/toolchain/driver/BUILD index 9913a85fe02ac..23f53d13448e8 100644 --- a/toolchain/driver/BUILD +++ b/toolchain/driver/BUILD @@ -128,6 +128,7 @@ cc_library( "//toolchain/base:pretty_stack_trace_function", "//toolchain/base:shared_value_stores", "//toolchain/base:timings", + "//toolchain/base:value_store", "//toolchain/check", "//toolchain/codegen", "//toolchain/diagnostics:diagnostic_emitter", diff --git a/toolchain/driver/compile_subcommand.cpp b/toolchain/driver/compile_subcommand.cpp index 5d4ec46cbc21e..fcc81f5213944 100644 --- a/toolchain/driver/compile_subcommand.cpp +++ b/toolchain/driver/compile_subcommand.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/ScopeExit.h" #include "toolchain/base/pretty_stack_trace_function.h" #include "toolchain/base/timings.h" +#include "toolchain/base/value_store.h" #include "toolchain/check/check.h" #include "toolchain/codegen/codegen.h" #include "toolchain/diagnostics/diagnostic_emitter.h" @@ -294,6 +295,37 @@ Whether to emit DWARF debug information. arg_b.Default(true); arg_b.Set(&include_debug_info); }); + b.AddFlag( + { + .name = "poison_verbose", + .help = R"""( +Print verbose logs of ASAN poisoning events in the value stores. Used to debug +ASAN use-after-poison crashes, and only has an effect when run with the ASAN +sanitizer. If used in a multi-thread environment, the same value should be +specified on all threads to get deterministic behaviour. +)""", + }, + [&](auto& arg_b) { + arg_b.Default(false); + arg_b.Set(&poison_verbose); + }); + b.AddStringOption( + { + .name = "poison_abort", + .value_name = "LABEL:COUNTER", + .help = R"""( +Set a condition after which the process will abort on the next ASAN poison +event. The format is LABEL:COUNTER. Once the LABEL is poisoned with a counter +value >= COUNTER, the process will abort and print the stack trace of the +ASAN poisoning event. Note that the poison counter can vary slightly from run to +run, so you can use a smaller number to catch the ASAN poison event that you +want. The possible LABEL and COUNTER values are printed when --poison_verbose is +used. Used to debug ASAN use-after-poison crashes, and only has an effect when +run with the ASAN sanitizer. If used in a multi-thread environment, the same +value should be specified on all threads to get deterministic behaviour. +)""", + }, + [&](auto& arg_b) { arg_b.Set(&poison_abort); }); } static constexpr CommandLine::CommandInfo SubcommandInfo = { @@ -856,6 +888,11 @@ auto CompileSubcommand::Run(DriverEnv& driver_env) -> DriverResult { return {.success = false}; } +#ifdef LLVM_ADDRESS_SANITIZER_BUILD + SetPoisonVerbose(options_.poison_verbose); + SetPoisonAbortCondition(options_.poison_abort); +#endif + // Find the files comprising the prelude if we are importing it. // TODO: Replace this with a search for library api files in a // package-specific search path based on the library name. diff --git a/toolchain/driver/compile_subcommand.h b/toolchain/driver/compile_subcommand.h index 2c5e993542fab..86fe760995b06 100644 --- a/toolchain/driver/compile_subcommand.h +++ b/toolchain/driver/compile_subcommand.h @@ -65,8 +65,10 @@ struct CompileOptions { bool builtin_sem_ir = false; bool prelude_import = false; bool include_debug_info = true; + bool poison_verbose = false; llvm::StringRef exclude_dump_file_prefix; + llvm::StringRef poison_abort; }; // Implements the compile subcommand of the driver. diff --git a/toolchain/testing/BUILD b/toolchain/testing/BUILD index a6f50efae75ef..2d8e25fdab95b 100644 --- a/toolchain/testing/BUILD +++ b/toolchain/testing/BUILD @@ -69,6 +69,7 @@ file_test( "//common:error", "//testing/file_test:file_test_base", "//toolchain/driver", + "@abseil-cpp//absl/flags:flag", "@abseil-cpp//absl/strings", "@llvm-project//llvm:Support", ], diff --git a/toolchain/testing/file_test.cpp b/toolchain/testing/file_test.cpp index dae2f68c02f40..2b8e171e947bc 100644 --- a/toolchain/testing/file_test.cpp +++ b/toolchain/testing/file_test.cpp @@ -11,6 +11,7 @@ #include #include +#include "absl/flags/flag.h" #include "absl/strings/str_replace.h" #include "common/error.h" #include "llvm/ADT/STLExtras.h" @@ -21,6 +22,10 @@ #include "testing/file_test/file_test_base.h" #include "toolchain/driver/driver.h" +// Flags known by the driver that are passed through to it. +ABSL_FLAG(bool, poison_verbose, false, "Passed through to the driver."); +ABSL_FLAG(std::string, poison_abort, "", "Passed through to the driver."); + namespace Carbon::Testing { namespace { @@ -44,6 +49,9 @@ class ToolchainFileTest : public FileTestBase { // Sets different default flags based on the component being tested. auto GetDefaultArgs() const -> llvm::SmallVector override; + // Returns the arguments that are being passed through to the driver. + auto GetAdditionalArgs() const -> llvm::SmallVector override; + // Generally uses the parent implementation, with special handling for lex. auto GetDefaultFileRE(llvm::ArrayRef filenames) const -> std::optional override; @@ -222,6 +230,20 @@ auto ToolchainFileTest::GetDefaultArgs() const return args; } +auto ToolchainFileTest::GetAdditionalArgs() const + -> llvm::SmallVector { + llvm::SmallVector args; + if (absl::GetFlag(FLAGS_poison_verbose)) { + args.push_back("--poison_verbose"); + } + if (auto value = absl::GetFlag(FLAGS_poison_abort); !value.empty()) { + std::string stop = "--poison_abort="; + stop += value; + args.push_back(std::move(stop)); + } + return args; +} + auto ToolchainFileTest::GetDefaultFileRE( llvm::ArrayRef filenames) const -> std::optional { if (component_ == "lex") {