Unverified Commit 4c8401f0 authored by Pete Stevenson's avatar Pete Stevenson Committed by Copybara
Browse files

perf_profile_connector.h|cc: make the table push period a run time knob, for...

perf_profile_connector.h|cc: make the table push period a run time knob, for https://github.com/pixie-io/pixie/issues/380.

Summary: perf_profile_connector.h|cc: make the table push period a run time knob, for https://github.com/pixie-io/pixie/issues/380

.

Test Plan: Existing tests.

Reviewers: #stirling, oazizi

Reviewed By: #stirling, oazizi

Subscribers: oazizi
Signed-off-by: default avatarPete Stevenson <jps@pixielabs.ai>

Differential Revision: https://phab.corp.pixielabs.ai/D10471

GitOrigin-RevId: 09140eac539f63e023c031a1049cf7cae1340f18
parent 2afd41fa
Showing with 83 additions and 50 deletions
+83 -50
......@@ -32,32 +32,62 @@ BPF_SRC_STRVIEW(profiler_bcc_script, profiler);
DEFINE_string(stirling_profiler_symbolizer, "bcc",
"Choice of which symbolizer to use. Options: bcc, elf");
DEFINE_bool(stirling_profiler_cache_symbols, true, "Whether to cache symbols");
DEFINE_uint32(stirling_perf_profiler_stats_logging_ratio,
std::chrono::minutes(10) / px::stirling::PerfProfileConnector::kSamplingPeriod,
"Sets the frequency of printing perf profiler stats.");
DEFINE_uint32(stirling_profiler_log_period_minutes, 10,
"Number of minutes between profiler stats log printouts.");
DEFINE_uint32(stirling_profiler_table_update_period_seconds, 30,
"Number of seconds between profiler table updates.");
DEFINE_uint32(stirling_profiler_stack_trace_sample_period_ms, 11,
"Number of milliseconds between stack trace samples.");
namespace px {
namespace stirling {
PerfProfileConnector::PerfProfileConnector(std::string_view source_name)
: SourceConnector(source_name, kTables) {}
: SourceConnector(source_name, kTables),
stack_trace_sampling_period_(
std::chrono::milliseconds{FLAGS_stirling_profiler_stack_trace_sample_period_ms}),
sampling_period_(
std::chrono::milliseconds{1000 * FLAGS_stirling_profiler_table_update_period_seconds}),
push_period_(sampling_period_ / 2),
stats_log_interval_(std::chrono::minutes(FLAGS_stirling_profiler_log_period_minutes) /
sampling_period_) {
constexpr auto kMaxSamplingPeriod = std::chrono::milliseconds{30000};
DCHECK(sampling_period_ <= kMaxSamplingPeriod) << "Sampling period set too high.";
DCHECK(sampling_period_ >= stack_trace_sampling_period_);
}
Status PerfProfileConnector::InitImpl() {
sampling_freq_mgr_.set_period(kSamplingPeriod);
push_freq_mgr_.set_period(kPushPeriod);
sampling_freq_mgr_.set_period(sampling_period_);
push_freq_mgr_.set_period(push_period_);
const size_t ncpus = get_nprocs_conf();
VLOG(1) << "PerfProfiler: get_nprocs_conf(): " << ncpus;
// TODO(jps): Move this out into section of code where we log kernel version, etc.
LOG(INFO) << "PerfProfiler: get_nprocs_conf(): " << ncpus;
const std::vector<std::string> defines = {
absl::Substitute("-DNCPUS=$0", ncpus),
absl::Substitute("-DTRANSFER_PERIOD=$0", kSamplingPeriod.count()),
absl::Substitute("-DSAMPLE_PERIOD=$0", kBPFSamplingPeriod.count())};
absl::Substitute("-DTRANSFER_PERIOD=$0", sampling_period_.count()),
absl::Substitute("-DSAMPLE_PERIOD=$0", stack_trace_sampling_period_.count())};
// Compute the perf buffer size.
const int32_t expected_stack_traces_per_cpu =
IntRoundUpDivide(sampling_period_.count(), stack_trace_sampling_period_.count());
const int32_t expected_stack_races = ncpus * expected_stack_traces_per_cpu;
const int32_t overprovision_factor = 4;
const int32_t num_perf_buffer_entries = overprovision_factor * expected_stack_races;
const uint64_t probe_sample_period_ms = stack_trace_sampling_period_.count();
const auto probe_specs =
MakeArray<bpf_tools::SamplingProbeSpec>({"sample_call_stack", probe_sample_period_ms});
const auto perf_buffer_specs = MakeArray<bpf_tools::PerfBufferSpec>(
{{"histogram_a", HandleHistoEvent, HandleHistoLoss, num_perf_buffer_entries},
{"histogram_b", HandleHistoEvent, HandleHistoLoss, num_perf_buffer_entries}});
PL_RETURN_IF_ERROR(InitBPFProgram(profiler_bcc_script, defines));
PL_RETURN_IF_ERROR(AttachSamplingProbes(kProbeSpecs));
PL_RETURN_IF_ERROR(OpenPerfBuffers(kPerfBufferSpecs, this));
PL_RETURN_IF_ERROR(AttachSamplingProbes(probe_specs));
PL_RETURN_IF_ERROR(OpenPerfBuffers(perf_buffer_specs, this));
stack_traces_a_ = std::make_unique<ebpf::BPFStackTable>(GetStackTable("stack_traces_a"));
stack_traces_b_ = std::make_unique<ebpf::BPFStackTable>(GetStackTable("stack_traces_b"));
......@@ -229,7 +259,7 @@ void PerfProfileConnector::CreateRecords(ebpf::BPFStackTable* stack_traces, Conn
StackTraceHisto stack_trace_histogram = AggregateStackTraces(ctx, stack_traces);
constexpr auto age_tick_period = std::chrono::minutes(5);
if (sampling_freq_mgr_.count() % (age_tick_period / kSamplingPeriod) == 0) {
if (sampling_freq_mgr_.count() % (age_tick_period / sampling_period_) == 0) {
stack_trace_ids_.AgeTick();
}
......@@ -287,7 +317,7 @@ void PerfProfileConnector::TransferDataImpl(ConnectorContext* ctx,
stats_.Increment(StatKey::kBPFMapSwitchoverEvent, 1);
if (sampling_freq_mgr_.count() % FLAGS_stirling_perf_profiler_stats_logging_ratio == 0) {
if (sampling_freq_mgr_.count() % stats_log_interval_ == 0) {
VLOG(1) << "PerfProfileConnector statistics: " << stats_.Print();
}
}
......
......@@ -49,24 +49,29 @@ class PerfProfileConnector : public SourceConnector, public bpf_tools::BCCWrappe
static constexpr auto kTables = MakeArray(kStackTraceTable);
static constexpr uint32_t kPerfProfileTableNum = TableNum(kTables, kStackTraceTable);
// kBPFSamplingPeriod: the time interval in between stack trace samples.
static constexpr auto kBPFSamplingPeriod = std::chrono::milliseconds{11};
// Push period is set to 1/2 of the sample period such that we push each new
// sample when it becomes available. This is a UX decision so that the user
// gets fresh profiler data every 30 seconds (or worst case w/in 45 seconds).
static constexpr auto kSamplingPeriod = std::chrono::milliseconds{30000};
static constexpr auto kPushPeriod = std::chrono::milliseconds{15000};
static std::unique_ptr<SourceConnector> Create(std::string_view name) {
return std::unique_ptr<SourceConnector>(new PerfProfileConnector(name));
static std::unique_ptr<PerfProfileConnector> Create(std::string_view name) {
return std::unique_ptr<PerfProfileConnector>(new PerfProfileConnector(name));
}
Status InitImpl() override;
Status StopImpl() override;
void TransferDataImpl(ConnectorContext* ctx, const std::vector<DataTable*>& data_tables) override;
std::chrono::milliseconds SamplingPeriod() const { return sampling_period_; }
std::chrono::milliseconds StackTraceSamplingPeriod() const {
return stack_trace_sampling_period_;
}
private:
// The time interval between stack trace samples, i.e. the sample rate used inside of BPF.
const std::chrono::milliseconds stack_trace_sampling_period_;
// Push period is set to 1/2 of the sample period such that we push each new
// sample when it becomes available. This is a UX decision so that the user
// gets fresh profiler data every 30 seconds (or worst case w/in 45 seconds).
const std::chrono::milliseconds sampling_period_;
const std::chrono::milliseconds push_period_;
// StackTraceHisto: SymbolicStackTrace => observation-count
using StackTraceHisto = absl::flat_hash_map<SymbolicStackTrace, uint64_t>;
......@@ -108,27 +113,12 @@ class PerfProfileConnector : public SourceConnector, public bpf_tools::BCCWrappe
// TODO(oazizi): Investigate ways of sharing across source_connectors.
ProcTracker proc_tracker_;
static constexpr auto kProbeSpecs =
MakeArray<bpf_tools::SamplingProbeSpec>({"sample_call_stack", kBPFSamplingPeriod.count()});
static const uint32_t kExpectedStackTracesPerCPU =
IntRoundUpDivide(kSamplingPeriod.count(), kBPFSamplingPeriod.count());
static const uint32_t kMaxNCPUs = 128;
static const uint32_t kExpectedStackTraces = kMaxNCPUs * kExpectedStackTracesPerCPU;
// Overprovision:
static const uint32_t kNumPerfBufferEntries = 4 * kExpectedStackTraces;
static void HandleHistoEvent(void* cb_cookie, void* data, int /*data_size*/);
static void HandleHistoLoss(void* cb_cookie, uint64_t lost);
// Called by HandleHistoEvent() to add the stack-trace-key to raw_histo_data_.
void AcceptStackTraceKey(stack_trace_key_t* data);
inline static const auto kPerfBufferSpecs = MakeArray<bpf_tools::PerfBufferSpec>(
{{"histogram_a", HandleHistoEvent, HandleHistoLoss, kNumPerfBufferEntries},
{"histogram_b", HandleHistoEvent, HandleHistoLoss, kNumPerfBufferEntries}});
ebpf::BPFPerfBuffer* histogram_a_perf_buffer_;
ebpf::BPFPerfBuffer* histogram_b_perf_buffer_;
......@@ -138,6 +128,7 @@ class PerfProfileConnector : public SourceConnector, public bpf_tools::BCCWrappe
kLossHistoEvent,
};
const uint32_t stats_log_interval_;
utils::StatCounter<StatKey> stats_;
};
......
......@@ -16,6 +16,8 @@
* SPDX-License-Identifier: Apache-2.0
*/
#include <algorithm>
#include <sys/sysinfo.h>
#include <absl/strings/substitute.h>
......@@ -28,6 +30,8 @@
#include "src/stirling/source_connectors/perf_profiler/stack_traces_table.h"
#include "src/stirling/testing/common.h"
DEFINE_uint32(test_run_time, 90, "Number of seconds to run the test.");
namespace px {
namespace stirling {
......@@ -56,12 +60,14 @@ class CPUPinnedBinaryRunner {
class PerfProfileBPFTest : public ::testing::Test {
public:
PerfProfileBPFTest() : data_table_(/*id*/ 0, kStackTraceTable) {}
PerfProfileBPFTest()
: test_run_time_(FLAGS_test_run_time), data_table_(/*id*/ 0, kStackTraceTable) {}
protected:
void SetUp() override {
source_ = PerfProfileConnector::Create("perf_profile_connector");
ASSERT_OK(source_->Init());
ASSERT_LT(source_->SamplingPeriod(), test_run_time_);
}
void TearDown() override { ASSERT_OK(source_->Stop()); }
......@@ -144,14 +150,18 @@ class PerfProfileBPFTest : public ::testing::Test {
void CheckExpectedStackTraceCounts(const ssize_t num_subprocesses,
const std::chrono::duration<double> elapsed_time,
const std::string& key1x, const std::string& key2x) {
const uint64_t kBPFSamplingPeriodMillis = PerfProfileConnector::kBPFSamplingPeriod.count();
const double expected_rate = 1000.0 / static_cast<double>(kBPFSamplingPeriodMillis);
const uint64_t table_period_ms = source_->SamplingPeriod().count();
const uint64_t bpf_period_ms = source_->StackTraceSamplingPeriod().count();
const double expected_rate = 1000.0 / static_cast<double>(bpf_period_ms);
const double expected_num_samples = num_subprocesses * elapsed_time.count() * expected_rate;
const uint64_t expected_num_sample_lower = uint64_t(0.9 * expected_num_samples);
const uint64_t expected_num_sample_upper = uint64_t(1.1 * expected_num_samples);
const double observedNumSamples = static_cast<double>(cumulative_sum_);
const double observed_rate = observedNumSamples / elapsed_time.count() / num_subprocesses;
LOG(INFO) << absl::StrFormat("Table sampling period: %d [ms]", table_period_ms);
LOG(INFO) << absl::StrFormat("BPF sampling period: %d [ms]", bpf_period_ms);
LOG(INFO) << absl::StrFormat("Number of processes: %d", num_subprocesses);
LOG(INFO) << absl::StrFormat("expected num samples: %d", uint64_t(expected_num_samples));
LOG(INFO) << absl::StrFormat("total samples: %d", cumulative_sum_);
LOG(INFO) << absl::StrFormat("elapsed time: %.1f [sec]", elapsed_time.count());
......@@ -209,10 +219,10 @@ class PerfProfileBPFTest : public ::testing::Test {
column_ptrs_populated_ = true;
}
std::chrono::duration<double> RunTest(const std::chrono::seconds test_run_time) {
constexpr std::chrono::milliseconds t_sleep = PerfProfileConnector::kSamplingPeriod;
std::chrono::duration<double> RunTest() {
const std::chrono::milliseconds t_sleep = source_->SamplingPeriod();
const auto start_time = std::chrono::steady_clock::now();
const auto stop_time = start_time + test_run_time;
const auto stop_time = start_time + test_run_time_;
// Continuously poke Stirling TransferData() using the underlying schema periodicity;
// break from this loop when the elapsed time exceeds the targeted run time.
......@@ -227,7 +237,8 @@ class PerfProfileBPFTest : public ::testing::Test {
return std::chrono::steady_clock::now() - start_time;
}
std::unique_ptr<SourceConnector> source_;
const std::chrono::seconds test_run_time_;
std::unique_ptr<PerfProfileConnector> source_;
std::unique_ptr<StandaloneContext> ctx_;
DataTable data_table_;
const std::vector<DataTable*> data_tables_{&data_table_};
......@@ -267,7 +278,7 @@ TEST_F(PerfProfileBPFTest, PerfProfilerGoTest) {
// finds the upids that belong to the sub-processes that we have just created.
ctx_ = std::make_unique<StandaloneContext>();
const std::chrono::duration<double> elapsed_time = RunTest(std::chrono::seconds(120));
const std::chrono::duration<double> elapsed_time = RunTest();
// Pull the data into this test (as columns_) using ConsumeRecords(), and
// find the row indices that belong to our sub-processes using GetTargetRowIdxs().
......@@ -305,7 +316,7 @@ TEST_F(PerfProfileBPFTest, PerfProfilerCppTest) {
// finds the upids that belong to the sub-processes that we have just created.
ctx_ = std::make_unique<StandaloneContext>();
const std::chrono::duration<double> elapsed_time = RunTest(std::chrono::seconds(120));
const std::chrono::duration<double> elapsed_time = RunTest();
// Pull the data into this test (as columns_) using ConsumeRecords(), and
// find the row indices that belong to our sub-processes using GetTargetRowIdxs().
......@@ -338,7 +349,8 @@ TEST_F(PerfProfileBPFTest, TestOutOfContext) {
// Start they toy apps as sub-processes, then,
// for a certain amount of time, collect data using RunTest().
auto sub_processes = StartSubProcesses<CPUPinnedBinaryRunner>(bazel_app_path, kTestIdx);
RunTest(std::chrono::seconds(30));
RunTest();
// Pull the data into this test (as columns_) using ConsumeRecords(), and
// find the row indices that belong to our sub-processes using GetTargetRowIdxs().
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment