// Copyright 2014 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // This file implements parsers to convert legacy profiles into the // profile.proto format. package profile import ( "bufio" "bytes" "fmt" "io" "math" "regexp" "strconv" "strings" ) var ( countStartRE = regexp.MustCompile(`\A(\w+) profile: total \d+\n\z`) countRE = regexp.MustCompile(`\A(\d+) @(( 0x[0-9a-f]+)+)\n\z`) heapHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] *@ *(heap[_a-z0-9]*)/?(\d*)`) heapSampleRE = regexp.MustCompile(`(-?\d+): *(-?\d+) *\[ *(\d+): *(\d+) *] @([ x0-9a-f]*)`) contentionSampleRE = regexp.MustCompile(`(\d+) *(\d+) @([ x0-9a-f]*)`) hexNumberRE = regexp.MustCompile(`0x[0-9a-f]+`) growthHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ growthz`) fragmentationHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ fragmentationz`) threadzStartRE = regexp.MustCompile(`--- threadz \d+ ---`) threadStartRE = regexp.MustCompile(`--- Thread ([[:xdigit:]]+) \(name: (.*)/(\d+)\) stack: ---`) procMapsRE = regexp.MustCompile(`([[:xdigit:]]+)-([[:xdigit:]]+)\s+([-rwxp]+)\s+([[:xdigit:]]+)\s+([[:xdigit:]]+):([[:xdigit:]]+)\s+([[:digit:]]+)\s*(\S+)?`) briefMapsRE = regexp.MustCompile(`\s*([[:xdigit:]]+)-([[:xdigit:]]+):\s*(\S+)(\s.*@)?([[:xdigit:]]+)?`) // LegacyHeapAllocated instructs the heapz parsers to use the // allocated memory stats instead of the default in-use memory. Note // that tcmalloc doesn't provide all allocated memory, only in-use // stats. LegacyHeapAllocated bool ) func isSpaceOrComment(line string) bool { trimmed := strings.TrimSpace(line) return len(trimmed) == 0 || trimmed[0] == '#' } // parseGoCount parses a Go count profile (e.g., threadcreate or // goroutine) and returns a new Profile. func parseGoCount(b []byte) (*Profile, error) { r := bytes.NewBuffer(b) var line string var err error for { // Skip past comments and empty lines seeking a real header. line, err = r.ReadString('\n') if err != nil { return nil, err } if !isSpaceOrComment(line) { break } } m := countStartRE.FindStringSubmatch(line) if m == nil { return nil, errUnrecognized } profileType := m[1] p := &Profile{ PeriodType: &ValueType{Type: profileType, Unit: "count"}, Period: 1, SampleType: []*ValueType{{Type: profileType, Unit: "count"}}, } locations := make(map[uint64]*Location) for { line, err = r.ReadString('\n') if err != nil { if err == io.EOF { break } return nil, err } if isSpaceOrComment(line) { continue } if strings.HasPrefix(line, "---") { break } m := countRE.FindStringSubmatch(line) if m == nil { return nil, errMalformed } n, err := strconv.ParseInt(m[1], 0, 64) if err != nil { return nil, errMalformed } fields := strings.Fields(m[2]) locs := make([]*Location, 0, len(fields)) for _, stk := range fields { addr, err := strconv.ParseUint(stk, 0, 64) if err != nil { return nil, errMalformed } // Adjust all frames by -1 to land on the call instruction. addr-- loc := locations[addr] if loc == nil { loc = &Location{ Address: addr, } locations[addr] = loc p.Location = append(p.Location, loc) } locs = append(locs, loc) } p.Sample = append(p.Sample, &Sample{ Location: locs, Value: []int64{n}, }) } if err = parseAdditionalSections(strings.TrimSpace(line), r, p); err != nil { return nil, err } return p, nil } // remapLocationIDs ensures there is a location for each address // referenced by a sample, and remaps the samples to point to the new // location ids. func (p *Profile) remapLocationIDs() { seen := make(map[*Location]bool, len(p.Location)) var locs []*Location for _, s := range p.Sample { for _, l := range s.Location { if seen[l] { continue } l.ID = uint64(len(locs) + 1) locs = append(locs, l) seen[l] = true } } p.Location = locs } func (p *Profile) remapFunctionIDs() { seen := make(map[*Function]bool, len(p.Function)) var fns []*Function for _, l := range p.Location { for _, ln := range l.Line { fn := ln.Function if fn == nil || seen[fn] { continue } fn.ID = uint64(len(fns) + 1) fns = append(fns, fn) seen[fn] = true } } p.Function = fns } // remapMappingIDs matches location addresses with existing mappings // and updates them appropriately. This is O(N*M), if this ever shows // up as a bottleneck, evaluate sorting the mappings and doing a // binary search, which would make it O(N*log(M)). func (p *Profile) remapMappingIDs() { if len(p.Mapping) == 0 { return } // Some profile handlers will incorrectly set regions for the main // executable if its section is remapped. Fix them through heuristics. // Remove the initial mapping if named '/anon_hugepage' and has a // consecutive adjacent mapping. if m := p.Mapping[0]; strings.HasPrefix(m.File, "/anon_hugepage") { if len(p.Mapping) > 1 && m.Limit == p.Mapping[1].Start { p.Mapping = p.Mapping[1:] } } // Subtract the offset from the start of the main mapping if it // ends up at a recognizable start address. const expectedStart = 0x400000 if m := p.Mapping[0]; m.Start-m.Offset == expectedStart { m.Start = expectedStart m.Offset = 0 } for _, l := range p.Location { if a := l.Address; a != 0 { for _, m := range p.Mapping { if m.Start <= a && a < m.Limit { l.Mapping = m break } } } } // Reset all mapping IDs. for i, m := range p.Mapping { m.ID = uint64(i + 1) } } var cpuInts = []func([]byte) (uint64, []byte){ get32l, get32b, get64l, get64b, } func get32l(b []byte) (uint64, []byte) { if len(b) < 4 { return 0, nil } return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24, b[4:] } func get32b(b []byte) (uint64, []byte) { if len(b) < 4 { return 0, nil } return uint64(b[3]) | uint64(b[2])<<8 | uint64(b[1])<<16 | uint64(b[0])<<24, b[4:] } func get64l(b []byte) (uint64, []byte) { if len(b) < 8 { return 0, nil } return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56, b[8:] } func get64b(b []byte) (uint64, []byte) { if len(b) < 8 { return 0, nil } return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 | uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56, b[8:] } // ParseTracebacks parses a set of tracebacks and returns a newly // populated profile. It will accept any text file and generate a // Profile out of it with any hex addresses it can identify, including // a process map if it can recognize one. Each sample will include a // tag "source" with the addresses recognized in string format. func ParseTracebacks(b []byte) (*Profile, error) { r := bytes.NewBuffer(b) p := &Profile{ PeriodType: &ValueType{Type: "trace", Unit: "count"}, Period: 1, SampleType: []*ValueType{ {Type: "trace", Unit: "count"}, }, } var sources []string var sloc []*Location locs := make(map[uint64]*Location) for { l, err := r.ReadString('\n') if err != nil { if err != io.EOF { return nil, err } if l == "" { break } } if sectionTrigger(l) == memoryMapSection { break } if s, addrs := extractHexAddresses(l); len(s) > 0 { for _, addr := range addrs { // Addresses from stack traces point to the next instruction after // each call. Adjust by -1 to land somewhere on the actual call. addr-- loc := locs[addr] if locs[addr] == nil { loc = &Location{ Address: addr, } p.Location = append(p.Location, loc) locs[addr] = loc } sloc = append(sloc, loc) } sources = append(sources, s...) } else { if len(sources) > 0 || len(sloc) > 0 { addTracebackSample(sloc, sources, p) sloc, sources = nil, nil } } } // Add final sample to save any leftover data. if len(sources) > 0 || len(sloc) > 0 { addTracebackSample(sloc, sources, p) } if err := p.ParseMemoryMap(r); err != nil { return nil, err } return p, nil } func addTracebackSample(l []*Location, s []string, p *Profile) { p.Sample = append(p.Sample, &Sample{ Value: []int64{1}, Location: l, Label: map[string][]string{"source": s}, }) } // parseCPU parses a profilez legacy profile and returns a newly // populated Profile. // // The general format for profilez samples is a sequence of words in // binary format. The first words are a header with the following data: // 1st word -- 0 // 2nd word -- 3 // 3rd word -- 0 if a c++ application, 1 if a java application. // 4th word -- Sampling period (in microseconds). // 5th word -- Padding. func parseCPU(b []byte) (*Profile, error) { var parse func([]byte) (uint64, []byte) var n1, n2, n3, n4, n5 uint64 for _, parse = range cpuInts { var tmp []byte n1, tmp = parse(b) n2, tmp = parse(tmp) n3, tmp = parse(tmp) n4, tmp = parse(tmp) n5, tmp = parse(tmp) if tmp != nil && n1 == 0 && n2 == 3 && n3 == 0 && n4 > 0 && n5 == 0 { b = tmp return cpuProfile(b, int64(n4), parse) } } return nil, errUnrecognized } // cpuProfile returns a new Profile from C++ profilez data. // b is the profile bytes after the header, period is the profiling // period, and parse is a function to parse 8-byte chunks from the // profile in its native endianness. func cpuProfile(b []byte, period int64, parse func(b []byte) (uint64, []byte)) (*Profile, error) { p := &Profile{ Period: period * 1000, PeriodType: &ValueType{Type: "cpu", Unit: "nanoseconds"}, SampleType: []*ValueType{ {Type: "samples", Unit: "count"}, {Type: "cpu", Unit: "nanoseconds"}, }, } var err error if b, _, err = parseCPUSamples(b, parse, true, p); err != nil { return nil, err } // If all samples have the same second-to-the-bottom frame, it // strongly suggests that it is an uninteresting artifact of // measurement -- a stack frame pushed by the signal handler. The // bottom frame is always correct as it is picked up from the signal // structure, not the stack. Check if this is the case and if so, // remove. if len(p.Sample) > 1 && len(p.Sample[0].Location) > 1 { allSame := true id1 := p.Sample[0].Location[1].Address for _, s := range p.Sample { if len(s.Location) < 2 || id1 != s.Location[1].Address { allSame = false break } } if allSame { for _, s := range p.Sample { s.Location = append(s.Location[:1], s.Location[2:]...) } } } if err := p.ParseMemoryMap(bytes.NewBuffer(b)); err != nil { return nil, err } return p, nil } // parseCPUSamples parses a collection of profilez samples from a // profile. // // profilez samples are a repeated sequence of stack frames of the // form: // 1st word -- The number of times this stack was encountered. // 2nd word -- The size of the stack (StackSize). // 3rd word -- The first address on the stack. // ... // StackSize + 2 -- The last address on the stack // The last stack trace is of the form: // 1st word -- 0 // 2nd word -- 1 // 3rd word -- 0 // // Addresses from stack traces may point to the next instruction after // each call. Optionally adjust by -1 to land somewhere on the actual // call (except for the leaf, which is not a call). func parseCPUSamples(b []byte, parse func(b []byte) (uint64, []byte), adjust bool, p *Profile) ([]byte, map[uint64]*Location, error) { locs := make(map[uint64]*Location) for len(b) > 0 { var count, nstk uint64 count, b = parse(b) nstk, b = parse(b) if b == nil || nstk > uint64(len(b)/4) { return nil, nil, errUnrecognized } var sloc []*Location addrs := make([]uint64, nstk) for i := 0; i < int(nstk); i++ { addrs[i], b = parse(b) } if count == 0 && nstk == 1 && addrs[0] == 0 { // End of data marker break } for i, addr := range addrs { if adjust && i > 0 { addr-- } loc := locs[addr] if loc == nil { loc = &Location{ Address: addr, } locs[addr] = loc p.Location = append(p.Location, loc) } sloc = append(sloc, loc) } p.Sample = append(p.Sample, &Sample{ Value: []int64{int64(count), int64(count) * p.Period}, Location: sloc, }) } // Reached the end without finding the EOD marker. return b, locs, nil } // parseHeap parses a heapz legacy or a growthz profile and // returns a newly populated Profile. func parseHeap(b []byte) (p *Profile, err error) { r := bytes.NewBuffer(b) l, err := r.ReadString('\n') if err != nil { return nil, errUnrecognized } sampling := "" if header := heapHeaderRE.FindStringSubmatch(l); header != nil { p = &Profile{ SampleType: []*ValueType{ {Type: "objects", Unit: "count"}, {Type: "space", Unit: "bytes"}, }, PeriodType: &ValueType{Type: "objects", Unit: "bytes"}, } var period int64 if len(header[6]) > 0 { if period, err = strconv.ParseInt(header[6], 10, 64); err != nil { return nil, errUnrecognized } } switch header[5] { case "heapz_v2", "heap_v2": sampling, p.Period = "v2", period case "heapprofile": sampling, p.Period = "", 1 case "heap": sampling, p.Period = "v2", period/2 default: return nil, errUnrecognized } } else if header = growthHeaderRE.FindStringSubmatch(l); header != nil { p = &Profile{ SampleType: []*ValueType{ {Type: "objects", Unit: "count"}, {Type: "space", Unit: "bytes"}, }, PeriodType: &ValueType{Type: "heapgrowth", Unit: "count"}, Period: 1, } } else if header = fragmentationHeaderRE.FindStringSubmatch(l); header != nil { p = &Profile{ SampleType: []*ValueType{ {Type: "objects", Unit: "count"}, {Type: "space", Unit: "bytes"}, }, PeriodType: &ValueType{Type: "allocations", Unit: "count"}, Period: 1, } } else { return nil, errUnrecognized } if LegacyHeapAllocated { for _, st := range p.SampleType { st.Type = "alloc_" + st.Type } } else { for _, st := range p.SampleType { st.Type = "inuse_" + st.Type } } locs := make(map[uint64]*Location) for { l, err = r.ReadString('\n') if err != nil { if err != io.EOF { return nil, err } if l == "" { break } } if isSpaceOrComment(l) { continue } l = strings.TrimSpace(l) if sectionTrigger(l) != unrecognizedSection { break } value, blocksize, addrs, err := parseHeapSample(l, p.Period, sampling) if err != nil { return nil, err } var sloc []*Location for _, addr := range addrs { // Addresses from stack traces point to the next instruction after // each call. Adjust by -1 to land somewhere on the actual call. addr-- loc := locs[addr] if locs[addr] == nil { loc = &Location{ Address: addr, } p.Location = append(p.Location, loc) locs[addr] = loc } sloc = append(sloc, loc) } p.Sample = append(p.Sample, &Sample{ Value: value, Location: sloc, NumLabel: map[string][]int64{"bytes": {blocksize}}, }) } if err = parseAdditionalSections(l, r, p); err != nil { return nil, err } return p, nil } // parseHeapSample parses a single row from a heap profile into a new Sample. func parseHeapSample(line string, rate int64, sampling string) (value []int64, blocksize int64, addrs []uint64, err error) { sampleData := heapSampleRE.FindStringSubmatch(line) if len(sampleData) != 6 { return value, blocksize, addrs, fmt.Errorf("unexpected number of sample values: got %d, want 6", len(sampleData)) } // Use first two values by default; tcmalloc sampling generates the // same value for both, only the older heap-profile collect separate // stats for in-use and allocated objects. valueIndex := 1 if LegacyHeapAllocated { valueIndex = 3 } var v1, v2 int64 if v1, err = strconv.ParseInt(sampleData[valueIndex], 10, 64); err != nil { return value, blocksize, addrs, fmt.Errorf("malformed sample: %s: %v", line, err) } if v2, err = strconv.ParseInt(sampleData[valueIndex+1], 10, 64); err != nil { return value, blocksize, addrs, fmt.Errorf("malformed sample: %s: %v", line, err) } if v1 == 0 { if v2 != 0 { return value, blocksize, addrs, fmt.Errorf("allocation count was 0 but allocation bytes was %d", v2) } } else { blocksize = v2 / v1 if sampling == "v2" { v1, v2 = scaleHeapSample(v1, v2, rate) } } value = []int64{v1, v2} addrs = parseHexAddresses(sampleData[5]) return value, blocksize, addrs, nil } // extractHexAddresses extracts hex numbers from a string and returns // them, together with their numeric value, in a slice. func extractHexAddresses(s string) ([]string, []uint64) { hexStrings := hexNumberRE.FindAllString(s, -1) var ids []uint64 for _, s := range hexStrings { if id, err := strconv.ParseUint(s, 0, 64); err == nil { ids = append(ids, id) } else { // Do not expect any parsing failures due to the regexp matching. panic("failed to parse hex value:" + s) } } return hexStrings, ids } // parseHexAddresses parses hex numbers from a string and returns them // in a slice. func parseHexAddresses(s string) []uint64 { _, ids := extractHexAddresses(s) return ids } // scaleHeapSample adjusts the data from a heapz Sample to // account for its probability of appearing in the collected // data. heapz profiles are a sampling of the memory allocations // requests in a program. We estimate the unsampled value by dividing // each collected sample by its probability of appearing in the // profile. heapz v2 profiles rely on a poisson process to determine // which samples to collect, based on the desired average collection // rate R. The probability of a sample of size S to appear in that // profile is 1-exp(-S/R). func scaleHeapSample(count, size, rate int64) (int64, int64) { if count == 0 || size == 0 { return 0, 0 } if rate <= 1 { // if rate==1 all samples were collected so no adjustment is needed. // if rate<1 treat as unknown and skip scaling. return count, size } avgSize := float64(size) / float64(count) scale := 1 / (1 - math.Exp(-avgSize/float64(rate))) return int64(float64(count) * scale), int64(float64(size) * scale) } // parseContention parses a mutex or contention profile. There are 2 cases: // "--- contentionz " for legacy C++ profiles (and backwards compatibility) // "--- mutex:" or "--- contention:" for profiles generated by the Go runtime. // This code converts the text output from runtime into a *Profile. (In the future // the runtime might write a serialized Profile directly making this unnecessary.) func parseContention(b []byte) (*Profile, error) { r := bytes.NewBuffer(b) var l string var err error for { // Skip past comments and empty lines seeking a real header. l, err = r.ReadString('\n') if err != nil { return nil, err } if !isSpaceOrComment(l) { break } } if strings.HasPrefix(l, "--- contentionz ") { return parseCppContention(r) } else if strings.HasPrefix(l, "--- mutex:") { return parseCppContention(r) } else if strings.HasPrefix(l, "--- contention:") { return parseCppContention(r) } return nil, errUnrecognized } // parseCppContention parses the output from synchronization_profiling.cc // for backward compatibility, and the compatible (non-debug) block profile // output from the Go runtime. func parseCppContention(r *bytes.Buffer) (*Profile, error) { p := &Profile{ PeriodType: &ValueType{Type: "contentions", Unit: "count"}, Period: 1, SampleType: []*ValueType{ {Type: "contentions", Unit: "count"}, {Type: "delay", Unit: "nanoseconds"}, }, } var cpuHz int64 var l string var err error // Parse text of the form "attribute = value" before the samples. const delimiter = "=" for { l, err = r.ReadString('\n') if err != nil { if err != io.EOF { return nil, err } if l == "" { break } } if isSpaceOrComment(l) { continue } if l = strings.TrimSpace(l); l == "" { continue } if strings.HasPrefix(l, "---") { break } attr := strings.SplitN(l, delimiter, 2) if len(attr) != 2 { break } key, val := strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1]) var err error switch key { case "cycles/second": if cpuHz, err = strconv.ParseInt(val, 0, 64); err != nil { return nil, errUnrecognized } case "sampling period": if p.Period, err = strconv.ParseInt(val, 0, 64); err != nil { return nil, errUnrecognized } case "ms since reset": ms, err := strconv.ParseInt(val, 0, 64) if err != nil { return nil, errUnrecognized } p.DurationNanos = ms * 1000 * 1000 case "format": // CPP contentionz profiles don't have format. return nil, errUnrecognized case "resolution": // CPP contentionz profiles don't have resolution. return nil, errUnrecognized case "discarded samples": default: return nil, errUnrecognized } } locs := make(map[uint64]*Location) for { if !isSpaceOrComment(l) { if l = strings.TrimSpace(l); strings.HasPrefix(l, "---") { break } value, addrs, err := parseContentionSample(l, p.Period, cpuHz) if err != nil { return nil, err } var sloc []*Location for _, addr := range addrs { // Addresses from stack traces point to the next instruction after // each call. Adjust by -1 to land somewhere on the actual call. addr-- loc := locs[addr] if locs[addr] == nil { loc = &Location{ Address: addr, } p.Location = append(p.Location, loc) locs[addr] = loc } sloc = append(sloc, loc) } p.Sample = append(p.Sample, &Sample{ Value: value, Location: sloc, }) } if l, err = r.ReadString('\n'); err != nil { if err != io.EOF { return nil, err } if l == "" { break } } } if err = parseAdditionalSections(l, r, p); err != nil { return nil, err } return p, nil } // parseContentionSample parses a single row from a contention profile // into a new Sample. func parseContentionSample(line string, period, cpuHz int64) (value []int64, addrs []uint64, err error) { sampleData := contentionSampleRE.FindStringSubmatch(line) if sampleData == nil { return value, addrs, errUnrecognized } v1, err := strconv.ParseInt(sampleData[1], 10, 64) if err != nil { return value, addrs, fmt.Errorf("malformed sample: %s: %v", line, err) } v2, err := strconv.ParseInt(sampleData[2], 10, 64) if err != nil { return value, addrs, fmt.Errorf("malformed sample: %s: %v", line, err) } // Unsample values if period and cpuHz are available. // - Delays are scaled to cycles and then to nanoseconds. // - Contentions are scaled to cycles. if period > 0 { if cpuHz > 0 { cpuGHz := float64(cpuHz) / 1e9 v1 = int64(float64(v1) * float64(period) / cpuGHz) } v2 = v2 * period } value = []int64{v2, v1} addrs = parseHexAddresses(sampleData[3]) return value, addrs, nil } // parseThread parses a Threadz profile and returns a new Profile. func parseThread(b []byte) (*Profile, error) { r := bytes.NewBuffer(b) var line string var err error for { // Skip past comments and empty lines seeking a real header. line, err = r.ReadString('\n') if err != nil { return nil, err } if !isSpaceOrComment(line) { break } } if m := threadzStartRE.FindStringSubmatch(line); m != nil { // Advance over initial comments until first stack trace. for { line, err = r.ReadString('\n') if err != nil { if err != io.EOF { return nil, err } if line == "" { break } } if sectionTrigger(line) != unrecognizedSection || line[0] == '-' { break } } } else if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 { return nil, errUnrecognized } p := &Profile{ SampleType: []*ValueType{{Type: "thread", Unit: "count"}}, PeriodType: &ValueType{Type: "thread", Unit: "count"}, Period: 1, } locs := make(map[uint64]*Location) // Recognize each thread and populate profile samples. for sectionTrigger(line) == unrecognizedSection { if strings.HasPrefix(line, "---- no stack trace for") { line = "" break } if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 { return nil, errUnrecognized } var addrs []uint64 line, addrs, err = parseThreadSample(r) if err != nil { return nil, errUnrecognized } if len(addrs) == 0 { // We got a --same as previous threads--. Bump counters. if len(p.Sample) > 0 { s := p.Sample[len(p.Sample)-1] s.Value[0]++ } continue } var sloc []*Location for _, addr := range addrs { // Addresses from stack traces point to the next instruction after // each call. Adjust by -1 to land somewhere on the actual call. addr-- loc := locs[addr] if locs[addr] == nil { loc = &Location{ Address: addr, } p.Location = append(p.Location, loc) locs[addr] = loc } sloc = append(sloc, loc) } p.Sample = append(p.Sample, &Sample{ Value: []int64{1}, Location: sloc, }) } if err = parseAdditionalSections(line, r, p); err != nil { return nil, err } return p, nil } // parseThreadSample parses a symbolized or unsymbolized stack trace. // Returns the first line after the traceback, the sample (or nil if // it hits a 'same-as-previous' marker) and an error. func parseThreadSample(b *bytes.Buffer) (nextl string, addrs []uint64, err error) { var l string sameAsPrevious := false for { if l, err = b.ReadString('\n'); err != nil { if err != io.EOF { return "", nil, err } if l == "" { break } } if l = strings.TrimSpace(l); l == "" { continue } if strings.HasPrefix(l, "---") { break } if strings.Contains(l, "same as previous thread") { sameAsPrevious = true continue } addrs = append(addrs, parseHexAddresses(l)...) } if sameAsPrevious { return l, nil, nil } return l, addrs, nil } // parseAdditionalSections parses any additional sections in the // profile, ignoring any unrecognized sections. func parseAdditionalSections(l string, b *bytes.Buffer, p *Profile) (err error) { for { if sectionTrigger(l) == memoryMapSection { break } // Ignore any unrecognized sections. if l, err := b.ReadString('\n'); err != nil { if err != io.EOF { return err } if l == "" { break } } } return p.ParseMemoryMap(b) } // ParseMemoryMap parses a memory map in the format of // /proc/self/maps, and overrides the mappings in the current profile. // It renumbers the samples and locations in the profile correspondingly. func (p *Profile) ParseMemoryMap(rd io.Reader) error { b := bufio.NewReader(rd) var attrs []string var r *strings.Replacer const delimiter = "=" for { l, err := b.ReadString('\n') if err != nil { if err != io.EOF { return err } if l == "" { break } } if l = strings.TrimSpace(l); l == "" { continue } if r != nil { l = r.Replace(l) } m, err := parseMappingEntry(l) if err != nil { if err == errUnrecognized { // Recognize assignments of the form: attr=value, and replace // $attr with value on subsequent mappings. if attr := strings.SplitN(l, delimiter, 2); len(attr) == 2 { attrs = append(attrs, "$"+strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1])) r = strings.NewReplacer(attrs...) } // Ignore any unrecognized entries continue } return err } if m == nil || (m.File == "" && len(p.Mapping) != 0) { // In some cases the first entry may include the address range // but not the name of the file. It should be followed by // another entry with the name. continue } if len(p.Mapping) == 1 && p.Mapping[0].File == "" { // Update the name if this is the entry following that empty one. p.Mapping[0].File = m.File continue } p.Mapping = append(p.Mapping, m) } p.remapLocationIDs() p.remapFunctionIDs() p.remapMappingIDs() return nil } func parseMappingEntry(l string) (*Mapping, error) { mapping := &Mapping{} var err error if me := procMapsRE.FindStringSubmatch(l); len(me) == 9 { if !strings.Contains(me[3], "x") { // Skip non-executable entries. return nil, nil } if mapping.Start, err = strconv.ParseUint(me[1], 16, 64); err != nil { return nil, errUnrecognized } if mapping.Limit, err = strconv.ParseUint(me[2], 16, 64); err != nil { return nil, errUnrecognized } if me[4] != "" { if mapping.Offset, err = strconv.ParseUint(me[4], 16, 64); err != nil { return nil, errUnrecognized } } mapping.File = me[8] return mapping, nil } if me := briefMapsRE.FindStringSubmatch(l); len(me) == 6 { if mapping.Start, err = strconv.ParseUint(me[1], 16, 64); err != nil { return nil, errUnrecognized } if mapping.Limit, err = strconv.ParseUint(me[2], 16, 64); err != nil { return nil, errUnrecognized } mapping.File = me[3] if me[5] != "" { if mapping.Offset, err = strconv.ParseUint(me[5], 16, 64); err != nil { return nil, errUnrecognized } } return mapping, nil } return nil, errUnrecognized } type sectionType int const ( unrecognizedSection sectionType = iota memoryMapSection ) var memoryMapTriggers = []string{ "--- Memory map: ---", "MAPPED_LIBRARIES:", } func sectionTrigger(line string) sectionType { for _, trigger := range memoryMapTriggers { if strings.Contains(line, trigger) { return memoryMapSection } } return unrecognizedSection } func (p *Profile) addLegacyFrameInfo() { switch { case isProfileType(p, heapzSampleTypes) || isProfileType(p, heapzInUseSampleTypes) || isProfileType(p, heapzAllocSampleTypes): p.DropFrames, p.KeepFrames = allocRxStr, allocSkipRxStr case isProfileType(p, contentionzSampleTypes): p.DropFrames, p.KeepFrames = lockRxStr, "" default: p.DropFrames, p.KeepFrames = cpuProfilerRxStr, "" } } var heapzSampleTypes = []string{"allocations", "size"} // early Go pprof profiles var heapzInUseSampleTypes = []string{"inuse_objects", "inuse_space"} var heapzAllocSampleTypes = []string{"alloc_objects", "alloc_space"} var contentionzSampleTypes = []string{"contentions", "delay"} func isProfileType(p *Profile, t []string) bool { st := p.SampleType if len(st) != len(t) { return false } for i := range st { if st[i].Type != t[i] { return false } } return true } var allocRxStr = strings.Join([]string{ // POSIX entry points. `calloc`, `cfree`, `malloc`, `free`, `memalign`, `do_memalign`, `(__)?posix_memalign`, `pvalloc`, `valloc`, `realloc`, // TC malloc. `tcmalloc::.*`, `tc_calloc`, `tc_cfree`, `tc_malloc`, `tc_free`, `tc_memalign`, `tc_posix_memalign`, `tc_pvalloc`, `tc_valloc`, `tc_realloc`, `tc_new`, `tc_delete`, `tc_newarray`, `tc_deletearray`, `tc_new_nothrow`, `tc_newarray_nothrow`, // Memory-allocation routines on OS X. `malloc_zone_malloc`, `malloc_zone_calloc`, `malloc_zone_valloc`, `malloc_zone_realloc`, `malloc_zone_memalign`, `malloc_zone_free`, // Go runtime `runtime\..*`, // Other misc. memory allocation routines `BaseArena::.*`, `(::)?do_malloc_no_errno`, `(::)?do_malloc_pages`, `(::)?do_malloc`, `DoSampledAllocation`, `MallocedMemBlock::MallocedMemBlock`, `_M_allocate`, `__builtin_(vec_)?delete`, `__builtin_(vec_)?new`, `__gnu_cxx::new_allocator::allocate`, `__libc_malloc`, `__malloc_alloc_template::allocate`, `allocate`, `cpp_alloc`, `operator new(\[\])?`, `simple_alloc::allocate`, }, `|`) var allocSkipRxStr = strings.Join([]string{ // Preserve Go runtime frames that appear in the middle/bottom of // the stack. `runtime\.panic`, `runtime\.reflectcall`, `runtime\.call[0-9]*`, }, `|`) var cpuProfilerRxStr = strings.Join([]string{ `ProfileData::Add`, `ProfileData::prof_handler`, `CpuProfiler::prof_handler`, `__pthread_sighandler`, `__restore`, }, `|`) var lockRxStr = strings.Join([]string{ `RecordLockProfileData`, `(base::)?RecordLockProfileData.*`, `(base::)?SubmitMutexProfileData.*`, `(base::)?SubmitSpinLockProfileData.*`, `(Mutex::)?AwaitCommon.*`, `(Mutex::)?Unlock.*`, `(Mutex::)?UnlockSlow.*`, `(Mutex::)?ReaderUnlock.*`, `(MutexLock::)?~MutexLock.*`, `(SpinLock::)?Unlock.*`, `(SpinLock::)?SlowUnlock.*`, `(SpinLockHolder::)?~SpinLockHolder.*`, }, `|`)