Skip to content

Fix ASAN poison bugs, add debugging infrastructure for finding them, and enable poisoning with ASAN by default #5529

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 11 commits into
base: trunk
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 0 additions & 17 deletions bazel/cc_toolchains/clang_cc_toolchain_config.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -643,22 +643,6 @@ def _impl(ctx):
],
)

# A feature that enables poisoning of value stores to detect use after
# potential reallocation bugs.
#
# TODO: Remove this and leave poisoning always on once these bugs are
# fixed.
poison_value_stores = feature(
name = "poison_value_stores",
requires = [feature_set(["asan"])],
flag_sets = [flag_set(
actions = all_compile_actions,
flag_groups = [flag_group(flags = [
"-DCARBON_POISON_VALUE_STORES=1",
])],
)],
)

fuzzer = feature(
name = "fuzzer",
flag_sets = [
Expand Down Expand Up @@ -1161,7 +1145,6 @@ def _impl(ctx):
asan,
asan_min_size,
enable_in_fastbuild,
poison_value_stores,
fuzzer,
layering_check,
module_maps,
Expand Down
6 changes: 3 additions & 3 deletions bazel/check_deps/check_non_test_cc_deps.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,9 @@
# exist. Specially diagnose them to try to provide a more helpful
# message.
if repo in (
"@google_benchmark",
"@abseil-cpp",
"@googletest",
"@@google_benchmark+",
"@@abseil-cpp+",
"@@googletest+",
):
sys.exit("ERROR: dependency only allowed in test code: %s" % dep)

Expand Down
9 changes: 6 additions & 3 deletions testing/file_test/file_test_base.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,9 @@ static auto RunSingleTest(FileTestInfo& test, bool single_threaded,
if (absl::GetFlag(FLAGS_dump_output)) {
std::unique_lock<std::mutex> lock(output_mutex);
llvm::errs() << "\n--- Dumping: " << test.test_name << "\n\n";
} else if (absl::GetFlag(FLAGS_threads) == 1) {
std::unique_lock<std::mutex> lock(output_mutex);
llvm::errs() << "\nTEST " << test.test_name << " ";
}

// Load expected output.
Expand Down Expand Up @@ -544,12 +547,12 @@ static auto Main(int argc, char** argv) -> ErrorOr<int> {

Carbon::InitLLVM init_llvm(argc, argv);
testing::InitGoogleTest(&argc, argv);
auto args = absl::ParseCommandLine(argc, argv);
auto positional_args = absl::ParseCommandLine(argc, argv);

if (args.size() > 1) {
if (positional_args.size() > 1) {
ErrorBuilder b;
b << "Unexpected arguments:";
for (char* arg : llvm::ArrayRef(args).drop_front()) {
for (char* arg : llvm::ArrayRef(positional_args).drop_front()) {
b << " " << FormatEscaped(arg);
}
return b;
Expand Down
5 changes: 5 additions & 0 deletions testing/file_test/file_test_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,11 @@ class FileTestBase {
return {};
}

// Returns additional arguments to add to the Run call, if any.
virtual auto GetAdditionalArgs() const -> llvm::SmallVector<std::string> {
return {};
}

// Returns a regex to match the default file when a line may not be present.
// May return nullptr if unused. If GetLineNumberReplacements returns an entry
// with has_file=false, this is required.
Expand Down
12 changes: 10 additions & 2 deletions testing/file_test/run_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,16 +134,24 @@ auto RunTestFile(const FileTestBase& test_base, bool dump_output,
}
}

// The subclass can inject additional arguments unconditionally.
auto additional_args = test_base.GetAdditionalArgs();

// Convert the arguments to StringRef and const char* to match the
// expectations of PrettyStackTraceProgram and Run.
llvm::SmallVector<llvm::StringRef> test_args_ref;
llvm::SmallVector<const char*> test_argv_for_stack_trace;
test_args_ref.reserve(test_file.test_args.size());
test_argv_for_stack_trace.reserve(test_file.test_args.size() + 1);
test_args_ref.reserve(test_file.test_args.size() + additional_args.size());
test_argv_for_stack_trace.reserve(test_file.test_args.size() +
additional_args.size() + 1);
for (const auto& arg : test_file.test_args) {
test_args_ref.push_back(arg);
test_argv_for_stack_trace.push_back(arg.c_str());
}
for (const auto& arg : additional_args) {
test_args_ref.push_back(arg);
test_argv_for_stack_trace.push_back(arg.c_str());
}
// Add a trailing null so that this is a proper argv.
test_argv_for_stack_trace.push_back(nullptr);

Expand Down
1 change: 1 addition & 0 deletions toolchain/base/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ cc_library(

cc_library(
name = "value_store",
srcs = ["value_store.cpp"],
hdrs = ["value_store.h"],
deps = [
":mem_usage",
Expand Down
106 changes: 106 additions & 0 deletions toolchain/base/value_store.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
// Exceptions. See /LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include "toolchain/base/value_store.h"

#include "common/ostream.h"

#ifdef LLVM_ADDRESS_SANITIZER_BUILD

namespace Carbon {

namespace Internal {

// Gets the global (thread-safe) decision to print poison logs.
//
// Initially this is called on each thread (with the same `val`) when deciding
// to print logs or not, and the first such thread to win which sets the static
// global bool. Further calls (which do not need to pass a value) just query and
// get back that `Condition`.
static auto GetShouldPrint(std::optional<bool> val = std::nullopt) -> bool {
static auto should_print = *val;
return should_print;
}

struct Condition {
std::string label;
uint64_t counter;
};

static auto ParseAbortCondition(llvm::StringRef condition) -> Condition {
if (condition.empty()) {
return {.counter = static_cast<uint64_t>(-1)};
}

auto [label, counter_str] = condition.split(':');
if (counter_str.empty()) {
llvm::errs()
<< "ERROR: --poison_abort condition should be 'label:counter'\n";
std::exit(1);
}
uint64_t counter;
// Note: getAsInteger returns false on success @_@.
if (counter_str.getAsInteger(0, counter)) {
llvm::errs() << "ERROR: counter could not be parsed as an integer\n";
llvm::errs()
<< "ERROR: --poison_abort condition should be 'label:counter'\n";
llvm::errs() << " NOTE: found label:'" << label << "' counter:'"
<< counter_str << "'\n";
std::exit(1);
}
return {.label = std::string(label), .counter = counter};
}

// Gets the global (thread-safe) abort Condition.
//
// Initially this is called on each thread (with the same `condition_str`) when
// setting the abort condition, and the first such thread to win which sets the
// static global `Condition`. Further calls without a string input just query
// and get back that `Condition`.
static auto GetAbortCondition(llvm::StringRef condition_str = "") -> Condition {
static Condition condition = ParseAbortCondition(condition_str);
return condition;
}

auto LogPoison(llvm::StringRef label, int element) -> void {
if (!GetShouldPrint()) {
return;
}
static uint64_t counter = 0;
if (element < 0) {
llvm::errs() << "++ " << label << " PoisonAll (" << label << ":" << counter
<< ")\n";
} else {
llvm::errs() << "++ " << label << " PoisonElement " << element << " ("
<< label << ":" << counter << ")\n";
}
auto condition = GetAbortCondition();
if (counter >= condition.counter && label == condition.label) {
llvm::errs() << "*** Aborting on poison event. Stack trace below.\n";
std::abort();
}
++counter;
}

auto LogUnpoison(llvm::StringRef label, int element) -> void {
if (!GetShouldPrint()) {
return;
}
if (element < 0) {
llvm::errs() << "-- " << label << " UnpoisonAll\n";
} else {
llvm::errs() << "-- " << label << " UnpoisonElement " << element << '\n';
}
}

} // namespace Internal

auto SetPoisonVerbose(bool v) -> void { Internal::GetShouldPrint(v); }
auto SetPoisonAbortCondition(llvm::StringRef s) -> void {
Internal::GetAbortCondition(s);
}

} // namespace Carbon

#endif
56 changes: 33 additions & 23 deletions toolchain/base/value_store.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,21 +23,27 @@
namespace Carbon {

namespace Internal {

// Used as a parent class for non-printable types. This is just for
// std::conditional, not as an API.
class ValueStoreNotPrintable {};
} // namespace Internal

// Setup our compile time condition controlling poisoning of value stores. This
// is set to one by the Bazel flag `--features=poison_value_stores`.
//
// TODO: Eventually, this will always enabled when ASan is enabled, but we can't
// do that until we clean up all of the latent bugs.
#ifndef CARBON_POISON_VALUE_STORES
#define CARBON_POISON_VALUE_STORES 0
#elif !LLVM_ADDRESS_SANITIZER_BUILD
#error "CARBON_POISON_VALUE_STORES requires address sanitizer"
#ifdef LLVM_ADDRESS_SANITIZER_BUILD
namespace Internal {
// Logs the poison event to stderr if `SetPoisonVerbose()` was set to true. A
// negative `element` indicates the entire store was poisoned.
auto LogPoison(llvm::StringRef label, int element) -> void;
// Logs the unpoison event to stderr if `SetPoisonVerbose()` was set to true. A
// negative `element` indicates the entire store was unpoisoned.
auto LogUnpoison(llvm::StringRef label, int element) -> void;
} // namespace Internal

// Control whether to print verbose ASAN poisoning logs showing each poison
// event in the value stores. See --poison_verbose.
auto SetPoisonVerbose(bool v) -> void;
// Set the condition on which an ASAN poison event will abort the process and
// dump a stack trace. See --poison_abort.
auto SetPoisonAbortCondition(llvm::StringRef s) -> void;
#endif

// A simple wrapper for accumulating values, providing IDs to later retrieve the
Expand Down Expand Up @@ -66,7 +72,7 @@ class ValueStore
ValueStore() = default;
ValueStore(ValueStore&& other) noexcept
: values_((other.UnpoisonAll(), std::move(other.values_)))
#if CARBON_POISON_VALUE_STORES
#ifdef LLVM_ADDRESS_SANITIZER_BUILD
,
all_poisoned_(false)
#endif
Expand All @@ -77,7 +83,7 @@ class ValueStore
UnpoisonAll();
other.UnpoisonAll();
values_ = std::move(other.values_);
#if CARBON_POISON_VALUE_STORES
#ifdef LLVM_ADDRESS_SANITIZER_BUILD
all_poisoned_ = false;
#endif
PoisonAll();
Expand All @@ -98,15 +104,13 @@ class ValueStore
// Unpoison everything if the push will reallocate, in order to allow the
// vector to make a copy of the elements.
UnpoisonAll();
} else {
PoisonAll();
}

values_.push_back(std::move(value));

if (realloc) {
PoisonAll();
} else {
if (!PoisonAll()) {
// If we did not realloc, and the store was already poisoned, the
// PoisonAll did nothing. So we poison the new element.
PoisonElement(id.index);
}

Expand Down Expand Up @@ -183,32 +187,38 @@ class ValueStore
// Poison the entire contents of the value store. This is used to detect cases
// where references to elements in a value store are used across calls that
// might modify the store.
auto PoisonAll() const -> void {
#if CARBON_POISON_VALUE_STORES
auto PoisonAll() const -> bool {
#ifdef LLVM_ADDRESS_SANITIZER_BUILD
if (!all_poisoned_) {
Internal::LogPoison(IdT::Label, -1);
__asan_poison_memory_region(values_.data(),
values_.size() * sizeof(values_[0]));
all_poisoned_ = true;
return true;
}
#endif
return false;
}
// Unpoison the entire contents of the value store.
auto UnpoisonAll() const -> void {
#if CARBON_POISON_VALUE_STORES
#ifdef LLVM_ADDRESS_SANITIZER_BUILD
Internal::LogUnpoison(IdT::Label, -1);
__asan_unpoison_memory_region(values_.data(),
values_.size() * sizeof(values_[0]));
all_poisoned_ = false;
#endif
}
// Poison a single element.
auto PoisonElement([[maybe_unused]] int element) const -> void {
#if CARBON_POISON_VALUE_STORES
#ifdef LLVM_ADDRESS_SANITIZER_BUILD
Internal::LogPoison(IdT::Label, element);
__asan_unpoison_memory_region(values_.data() + element, sizeof(values_[0]));
#endif
}
// Unpoison a single element.
auto UnpoisonElement([[maybe_unused]] int element) const -> void {
#if CARBON_POISON_VALUE_STORES
#ifdef LLVM_ADDRESS_SANITIZER_BUILD
Internal::LogUnpoison(IdT::Label, element);
__asan_unpoison_memory_region(values_.data() + element, sizeof(values_[0]));
all_poisoned_ = false;
#endif
Expand All @@ -218,7 +228,7 @@ class ValueStore
// stack, while this does make File smaller.
llvm::SmallVector<std::decay_t<ValueType>, 0> values_;

#if CARBON_POISON_VALUE_STORES
#ifdef LLVM_ADDRESS_SANITIZER_BUILD
// Whether the vector is currently fully poisoned.
//
// We use this to avoid repeated re-poisoning of the entire store. Doing so is
Expand Down
4 changes: 3 additions & 1 deletion toolchain/check/class.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,9 @@ static auto CheckCompleteClassType(
}

if (class_info.is_dynamic) {
class_info.vtable_id = BuildVtable(
// This function imports, which can invalidate `class_info` and other
// pointers into value stores.
context.classes().Get(class_id).vtable_id = BuildVtable(
context, node_id,
defining_vptr ? SemIR::InstId::None : base_class_info->vtable_id,
vtable_contents);
Expand Down
5 changes: 4 additions & 1 deletion toolchain/check/handle_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,8 @@ auto HandleParseNode(Context& context, Parse::ImplDefinitionStartId node_id)
impl_decl_id, impl_info.scope_id,
context.generics().GetSelfSpecific(impl_info.generic_id));
StartGenericDefinition(context, impl_info.generic_id);
// This function imports, which can invalidate `impl_info` and other pointers
// into value stores.
ImplWitnessStartDefinition(context, impl_info);
context.inst_block_stack().Push();
context.node_stack().Push(node_id, impl_id);
Expand All @@ -579,7 +581,8 @@ auto HandleParseNode(Context& context, Parse::ImplDefinitionStartId node_id)
//
// We may need to track a list of instruction blocks here, as we do for a
// function.
impl_info.body_block_id = context.inst_block_stack().PeekOrAdd();
context.impls().Get(impl_id).body_block_id =
context.inst_block_stack().PeekOrAdd();
return true;
}

Expand Down
Loading
Loading