Performance Optimization and Profiling: Advanced Techniques for Scala Applications
Performance optimization is crucial for building scalable, responsive Scala applications. This comprehensive lesson covers advanced profiling techniques, JVM tuning, memory management, algorithmic optimizations, and systematic approaches to identifying and resolving performance bottlenecks.
Understanding Performance Fundamentals
Performance Metrics and Measurement
import scala.util.Random
import java.util.concurrent.TimeUnit
import scala.concurrent.duration._
import java.lang.management.ManagementFactory
// Performance measurement utilities
object PerformanceMetrics {
case class BenchmarkResult(
operation: String,
totalTime: Duration,
iterations: Long,
throughput: Double, // operations per second
averageLatency: Duration,
p95Latency: Duration,
p99Latency: Duration,
memoryUsed: Long
)
// Micro-benchmark framework
def benchmark[T](
name: String,
warmupIterations: Int = 1000,
measurementIterations: Int = 10000,
operation: () => T
): BenchmarkResult = {
// Warmup phase to trigger JIT compilation
for (_ <- 1 to warmupIterations) {
operation()
}
// Force garbage collection before measurement
System.gc()
Thread.sleep(100)
val memoryBefore = getUsedMemory()
val latencies = scala.collection.mutable.ArrayBuffer[Long]()
val startTime = System.nanoTime()
for (_ <- 1 to measurementIterations) {
val operationStart = System.nanoTime()
operation()
val operationEnd = System.nanoTime()
latencies += (operationEnd - operationStart)
}
val endTime = System.nanoTime()
val totalNanos = endTime - startTime
val memoryAfter = getUsedMemory()
// Calculate statistics
val sortedLatencies = latencies.sorted
val totalTime = Duration(totalNanos, TimeUnit.NANOSECONDS)
val throughput = measurementIterations.toDouble / (totalNanos.toDouble / 1e9)
val averageLatency = Duration(latencies.sum / latencies.length, TimeUnit.NANOSECONDS)
val p95Index = (latencies.length * 0.95).toInt
val p99Index = (latencies.length * 0.99).toInt
val p95Latency = Duration(sortedLatencies(p95Index), TimeUnit.NANOSECONDS)
val p99Latency = Duration(sortedLatencies(p99Index), TimeUnit.NANOSECONDS)
BenchmarkResult(
operation = name,
totalTime = totalTime,
iterations = measurementIterations,
throughput = throughput,
averageLatency = averageLatency,
p95Latency = p95Latency,
p99Latency = p99Latency,
memoryUsed = memoryAfter - memoryBefore
)
}
// Multi-threaded benchmark
def benchmarkConcurrent[T](
name: String,
threadCount: Int,
operationsPerThread: Int,
operation: () => T
): BenchmarkResult = {
import java.util.concurrent.{Executors, CountDownLatch, ConcurrentLinkedQueue}
val executor = Executors.newFixedThreadPool(threadCount)
val latch = new CountDownLatch(threadCount)
val latencies = new ConcurrentLinkedQueue[Long]()
// Warmup
for (_ <- 1 to 1000) operation()
System.gc()
Thread.sleep(100)
val memoryBefore = getUsedMemory()
val startTime = System.nanoTime()
// Start concurrent threads
for (_ <- 1 to threadCount) {
executor.execute(() => {
try {
for (_ <- 1 to operationsPerThread) {
val operationStart = System.nanoTime()
operation()
val operationEnd = System.nanoTime()
latencies.add(operationEnd - operationStart)
}
} finally {
latch.countDown()
}
})
}
latch.await()
val endTime = System.nanoTime()
val memoryAfter = getUsedMemory()
executor.shutdown()
// Calculate statistics
val latencyList = latencies.asScala.toList.sorted
val totalOperations = threadCount * operationsPerThread
val totalNanos = endTime - startTime
BenchmarkResult(
operation = s"$name (${threadCount} threads)",
totalTime = Duration(totalNanos, TimeUnit.NANOSECONDS),
iterations = totalOperations,
throughput = totalOperations.toDouble / (totalNanos.toDouble / 1e9),
averageLatency = Duration(latencyList.sum / latencyList.length, TimeUnit.NANOSECONDS),
p95Latency = Duration(latencyList((latencyList.length * 0.95).toInt), TimeUnit.NANOSECONDS),
p99Latency = Duration(latencyList((latencyList.length * 0.99).toInt), TimeUnit.NANOSECONDS),
memoryUsed = memoryAfter - memoryBefore
)
}
private def getUsedMemory(): Long = {
val runtime = Runtime.getRuntime
runtime.totalMemory() - runtime.freeMemory()
}
// Memory allocation tracking
def measureAllocations[T](operation: () => T): (T, Long) = {
val memoryBefore = getUsedMemory()
System.gc()
Thread.sleep(50)
val baselineMemory = getUsedMemory()
val result = operation()
System.gc()
Thread.sleep(50)
val finalMemory = getUsedMemory()
val allocatedMemory = math.max(0, finalMemory - baselineMemory)
(result, allocatedMemory)
}
// CPU profiling utilities
def profileCpuUsage[T](operation: () => T): (T, Double) = {
val bean = ManagementFactory.getThreadMXBean
val threadId = Thread.currentThread().getId
val startCpuTime = bean.getThreadCpuTime(threadId)
val startWallTime = System.nanoTime()
val result = operation()
val endCpuTime = bean.getThreadCpuTime(threadId)
val endWallTime = System.nanoTime()
val cpuTime = endCpuTime - startCpuTime
val wallTime = endWallTime - startWallTime
val cpuUtilization = cpuTime.toDouble / wallTime.toDouble
(result, cpuUtilization)
}
}
// JVM and GC monitoring
object JVMMetrics {
case class GCStats(
gcName: String,
collectionCount: Long,
collectionTime: Long,
memoryPoolsBefore: Map[String, Long],
memoryPoolsAfter: Map[String, Long]
)
case class MemoryStats(
heapUsed: Long,
heapMax: Long,
nonHeapUsed: Long,
nonHeapMax: Long,
directMemory: Long,
pools: Map[String, (Long, Long)] // (used, max)
)
def getCurrentMemoryStats(): MemoryStats = {
val heapMemory = ManagementFactory.getMemoryMXBean.getHeapMemoryUsage
val nonHeapMemory = ManagementFactory.getMemoryMXBean.getNonHeapMemoryUsage
val memoryPools = ManagementFactory.getMemoryPoolMXBeans.asScala
val pools = memoryPools.map { pool =>
val usage = pool.getUsage
pool.getName -> (usage.getUsed, usage.getMax)
}.toMap
// Estimate direct memory (this is approximate)
val directMemory = try {
val directBufferBean = ManagementFactory.getPlatformMXBean(classOf[com.sun.management.OperatingSystemMXBean])
// Direct memory estimation - actual implementation would be more complex
0L
} catch {
case _: Exception => 0L
}
MemoryStats(
heapUsed = heapMemory.getUsed,
heapMax = heapMemory.getMax,
nonHeapUsed = nonHeapMemory.getUsed,
nonHeapMax = nonHeapMemory.getMax,
directMemory = directMemory,
pools = pools
)
}
def getGCStats(): List[GCStats] = {
import scala.jdk.CollectionConverters._
ManagementFactory.getGarbageCollectorMXBeans.asScala.map { gc =>
GCStats(
gcName = gc.getName,
collectionCount = gc.getCollectionCount,
collectionTime = gc.getCollectionTime,
memoryPoolsBefore = Map.empty, // Would need more complex tracking
memoryPoolsAfter = Map.empty
)
}.toList
}
// Monitor GC impact
def monitorGC[T](operation: () => T): (T, List[GCStats], Duration) = {
val gcBefore = getGCStats()
val startTime = System.nanoTime()
val result = operation()
val endTime = System.nanoTime()
val gcAfter = getGCStats()
val gcDelta = gcBefore.zip(gcAfter).map { case (before, after) =>
after.copy(
collectionCount = after.collectionCount - before.collectionCount,
collectionTime = after.collectionTime - before.collectionTime
)
}
val totalTime = Duration(endTime - startTime, TimeUnit.NANOSECONDS)
(result, gcDelta, totalTime)
}
}
Algorithm and Data Structure Optimization
// Optimized collection operations
object OptimizedCollections {
// Efficient bulk operations
def efficientGroupBy[T, K](items: List[T], keyFunc: T => K): Map[K, List[T]] = {
val result = scala.collection.mutable.Map[K, scala.collection.mutable.ListBuffer[T]]()
items.foreach { item =>
val key = keyFunc(item)
result.getOrElseUpdate(key, scala.collection.mutable.ListBuffer.empty) += item
}
result.view.mapValues(_.toList).toMap
}
// Memory-efficient filtering and mapping
def filterMap[T, U](items: List[T])(f: T => Option[U]): List[U] = {
val result = scala.collection.mutable.ListBuffer[U]()
items.foreach { item =>
f(item).foreach(result += _)
}
result.toList
}
// Chunked processing for large datasets
def processInChunks[T, U](
items: Iterator[T],
chunkSize: Int,
processor: List[T] => List[U]
): Iterator[U] = {
items.grouped(chunkSize).flatMap(chunk => processor(chunk.toList))
}
// Lazy evaluation with memoization
class MemoizedFunction[A, B](f: A => B) {
private val cache = scala.collection.mutable.Map[A, B]()
def apply(x: A): B = cache.getOrElseUpdate(x, f(x))
def clearCache(): Unit = cache.clear()
def cacheSize: Int = cache.size
}
// Efficient set operations
object FastSetOps {
// Bit set for integers
class BitSetInt(maxValue: Int) {
private val bits = new Array[Long]((maxValue + 63) / 64)
def add(value: Int): Unit = {
val wordIndex = value / 64
val bitIndex = value % 64
bits(wordIndex) |= (1L << bitIndex)
}
def contains(value: Int): Boolean = {
val wordIndex = value / 64
val bitIndex = value % 64
(bits(wordIndex) & (1L << bitIndex)) != 0
}
def remove(value: Int): Unit = {
val wordIndex = value / 64
val bitIndex = value % 64
bits(wordIndex) &= ~(1L << bitIndex)
}
def size: Int = bits.map(java.lang.Long.bitCount).sum
}
// Bloom filter for approximate membership testing
class BloomFilter[T](capacity: Int, errorRate: Double = 0.01)(hash: T => Int) {
private val bitArraySize = (-capacity * math.log(errorRate) / (math.log(2) * math.log(2))).toInt
private val hashFunctions = (-math.log(errorRate) / math.log(2)).toInt
private val bitArray = new Array[Boolean](bitArraySize)
def add(item: T): Unit = {
val baseHash = hash(item)
for (i <- 0 until hashFunctions) {
val hashValue = math.abs((baseHash + i * 37) % bitArraySize)
bitArray(hashValue) = true
}
}
def mightContain(item: T): Boolean = {
val baseHash = hash(item)
(0 until hashFunctions).forall { i =>
val hashValue = math.abs((baseHash + i * 37) % bitArraySize)
bitArray(hashValue)
}
}
}
}
// High-performance string operations
object StringOptimizations {
// StringBuilder with initial capacity
def efficientStringBuilder(expectedLength: Int): StringBuilder =
new StringBuilder(expectedLength)
// String interning for repeated strings
private val internMap = scala.collection.mutable.Map[String, String]()
def intern(s: String): String = internMap.getOrElseUpdate(s, s)
// Fast string searching using KMP algorithm
def kmpSearch(text: String, pattern: String): List[Int] = {
if (pattern.isEmpty) return List.empty
// Build failure function
val failure = Array.ofDim[Int](pattern.length)
var j = 0
for (i <- 1 until pattern.length) {
while (j > 0 && pattern(i) != pattern(j)) {
j = failure(j - 1)
}
if (pattern(i) == pattern(j)) {
j += 1
}
failure(i) = j
}
// Search for pattern
val matches = scala.collection.mutable.ListBuffer[Int]()
j = 0
for (i <- text.indices) {
while (j > 0 && text(i) != pattern(j)) {
j = failure(j - 1)
}
if (text(i) == pattern(j)) {
j += 1
}
if (j == pattern.length) {
matches += (i - j + 1)
j = failure(j - 1)
}
}
matches.toList
}
}
}
// Numerical computation optimizations
object NumericalOptimizations {
// Fast mathematical operations
object FastMath {
// Fast integer square root
def fastSqrt(x: Int): Int = {
if (x == 0) return 0
var result = x
var last = (x + 1) / 2
while (last < result) {
result = last
last = (result + x / result) / 2
}
result
}
// Fast power calculation using exponentiation by squaring
def fastPow(base: Long, exponent: Int): Long = {
if (exponent == 0) return 1L
if (exponent == 1) return base
val half = fastPow(base, exponent / 2)
if (exponent % 2 == 0) {
half * half
} else {
half * half * base
}
}
// Bit manipulation utilities
def isPowerOfTwo(n: Int): Boolean = n > 0 && (n & (n - 1)) == 0
def nextPowerOfTwo(n: Int): Int = {
var power = 1
while (power < n) power <<= 1
power
}
def countSetBits(n: Int): Int = java.lang.Integer.bitCount(n)
// Fast modular arithmetic
def fastMod(dividend: Int, divisor: Int): Int = {
if (isPowerOfTwo(divisor)) {
dividend & (divisor - 1)
} else {
dividend % divisor
}
}
}
// Specialized numeric collections
class IntArray(size: Int) {
private val array = new Array[Int](size)
private var length = 0
def add(value: Int): Unit = {
if (length >= array.length) {
throw new IndexOutOfBoundsException("Array is full")
}
array(length) = value
length += 1
}
def get(index: Int): Int = {
if (index >= length) throw new IndexOutOfBoundsException()
array(index)
}
def size: Int = length
def sum: Long = {
var total = 0L
var i = 0
while (i < length) {
total += array(i)
i += 1
}
total
}
def max: Option[Int] = {
if (length == 0) None
else {
var maximum = array(0)
var i = 1
while (i < length) {
if (array(i) > maximum) maximum = array(i)
i += 1
}
Some(maximum)
}
}
def sortInPlace(): Unit = {
java.util.Arrays.sort(array, 0, length)
}
}
// Matrix operations with memory optimization
class DenseMatrix(rows: Int, cols: Int) {
private val data = new Array[Double](rows * cols)
def apply(row: Int, col: Int): Double = {
data(row * cols + col)
}
def update(row: Int, col: Int, value: Double): Unit = {
data(row * cols + col) = value
}
def multiply(other: DenseMatrix): DenseMatrix = {
require(cols == other.rows, "Matrix dimensions don't match")
val result = new DenseMatrix(rows, other.cols)
// Cache-friendly multiplication
for (i <- 0 until rows; k <- 0 until cols; j <- 0 until other.cols) {
result.data(i * other.cols + j) += data(i * cols + k) * other.data(k * other.cols + j)
}
result
}
def transpose: DenseMatrix = {
val result = new DenseMatrix(cols, rows)
for (i <- 0 until rows; j <- 0 until cols) {
result(j, i) = this(i, j)
}
result
}
}
}
Memory Management and GC Optimization
Understanding Memory Allocation Patterns
// Memory-efficient programming patterns
object MemoryOptimizations {
// Object pooling for frequently created objects
class ObjectPool[T](factory: () => T, reset: T => Unit, maxSize: Int = 100) {
private val pool = scala.collection.mutable.Queue[T]()
private var created = 0
def borrow(): T = {
pool.synchronized {
if (pool.nonEmpty) {
pool.dequeue()
} else {
created += 1
factory()
}
}
}
def return_(obj: T): Unit = {
pool.synchronized {
if (pool.size < maxSize) {
reset(obj)
pool.enqueue(obj)
}
}
}
def stats: (Int, Int) = pool.synchronized((created, pool.size))
}
// StringBuilder pool for string operations
val stringBuilderPool = new ObjectPool[StringBuilder](
factory = () => new StringBuilder(256),
reset = _.clear(),
maxSize = 50
)
def withStringBuilder[T](f: StringBuilder => T): T = {
val sb = stringBuilderPool.borrow()
try {
f(sb)
} finally {
stringBuilderPool.return_(sb)
}
}
// Off-heap data structures using DirectByteBuffer
class OffHeapIntArray(size: Int) {
private val buffer = java.nio.ByteBuffer.allocateDirect(size * 4)
def set(index: Int, value: Int): Unit = {
buffer.putInt(index * 4, value)
}
def get(index: Int): Int = {
buffer.getInt(index * 4)
}
def size: Int = buffer.capacity() / 4
def close(): Unit = {
// DirectByteBuffer cleanup (Java 9+)
try {
val cleanerMethod = buffer.getClass.getMethod("cleaner")
val cleaner = cleanerMethod.invoke(buffer)
val cleanMethod = cleaner.getClass.getMethod("clean")
cleanMethod.invoke(cleaner)
} catch {
case _: Exception => // Ignore cleanup errors
}
}
}
// Memory-mapped file for large datasets
class MemoryMappedArray(file: java.io.File, size: Long) {
private val channel = java.nio.channels.FileChannel.open(
file.toPath,
java.nio.file.StandardOpenOption.CREATE,
java.nio.file.StandardOpenOption.READ,
java.nio.file.StandardOpenOption.WRITE
)
private val buffer = channel.map(
java.nio.channels.FileChannel.MapMode.READ_WRITE,
0,
size
)
def putLong(index: Long, value: Long): Unit = {
buffer.putLong((index * 8).toInt, value)
}
def getLong(index: Long): Long = {
buffer.getLong((index * 8).toInt)
}
def close(): Unit = {
channel.close()
}
}
// Flyweight pattern for immutable objects
class FlyweightFactory[T] {
private val cache = scala.collection.mutable.Map[String, T]()
def getInstance(key: String, factory: => T): T = {
cache.getOrElseUpdate(key, factory)
}
def cacheSize: Int = cache.size
def clearCache(): Unit = cache.clear()
}
// Reference management utilities
object ReferenceUtils {
import java.lang.ref.{WeakReference, SoftReference, PhantomReference, ReferenceQueue}
// Weak reference cache
class WeakCache[K, V] {
private val cache = scala.collection.mutable.Map[K, WeakReference[V]]()
def get(key: K): Option[V] = {
cache.get(key).flatMap(ref => Option(ref.get()))
}
def put(key: K, value: V): Unit = {
cache(key) = new WeakReference(value)
}
def cleanup(): Unit = {
cache.retain { case (_, ref) => ref.get() != null }
}
def size: Int = cache.size
}
// Memory-sensitive cache using SoftReference
class SoftCache[K, V] {
private val cache = scala.collection.mutable.Map[K, SoftReference[V]]()
def get(key: K): Option[V] = {
cache.get(key).flatMap(ref => Option(ref.get()))
}
def put(key: K, value: V): Unit = {
cache(key) = new SoftReference(value)
}
def cleanup(): Unit = {
cache.retain { case (_, ref) => ref.get() != null }
}
}
}
// Memory usage monitoring
def measureMemoryUsage[T](operation: => T): (T, Long) = {
System.gc()
Thread.sleep(100)
val memoryBefore = getUsedMemory()
val result = operation
System.gc()
Thread.sleep(100)
val memoryAfter = getUsedMemory()
(result, memoryAfter - memoryBefore)
}
private def getUsedMemory(): Long = {
val runtime = Runtime.getRuntime
runtime.totalMemory() - runtime.freeMemory()
}
}
// Garbage collection optimization strategies
object GCOptimization {
// Allocation rate monitoring
class AllocationRateMonitor {
@volatile private var lastGcTime = System.currentTimeMillis()
@volatile private var lastAllocatedBytes = getAllocatedBytes()
def getCurrentAllocationRate(): Double = {
val currentTime = System.currentTimeMillis()
val currentAllocated = getAllocatedBytes()
val timeDelta = currentTime - lastGcTime
val bytesDelta = currentAllocated - lastAllocatedBytes
lastGcTime = currentTime
lastAllocatedBytes = currentAllocated
if (timeDelta > 0) bytesDelta.toDouble / timeDelta else 0.0
}
private def getAllocatedBytes(): Long = {
ManagementFactory.getMemoryPoolMXBeans.asScala
.filter(_.getType == java.lang.management.MemoryType.HEAP)
.map(_.getUsage.getUsed)
.sum
}
}
// GC-friendly programming patterns
object GCFriendlyPatterns {
// Reuse collections instead of creating new ones
def processDataGCFriendly[T](data: List[T], processor: T => T): List[T] = {
val result = scala.collection.mutable.ListBuffer[T]()
result.sizeHint(data.length)
data.foreach { item =>
result += processor(item)
}
result.toList
}
// Minimize boxing/unboxing
def sumIntsEfficiently(numbers: Array[Int]): Long = {
var sum = 0L
var i = 0
while (i < numbers.length) {
sum += numbers(i)
i += 1
}
sum
}
// Use primitive collections when possible
import scala.collection.mutable.ArrayBuffer
def efficientIntProcessing(size: Int): ArrayBuffer[Int] = {
val buffer = new ArrayBuffer[Int](size)
var i = 0
while (i < size) {
buffer += i * 2
i += 1
}
buffer
}
// Streaming processing to reduce memory pressure
def processLargeDataset[T, U](
source: Iterator[T],
batchSize: Int,
processor: List[T] => List[U]
): Iterator[U] = {
source.grouped(batchSize).flatMap { batch =>
processor(batch.toList)
}
}
}
// JVM tuning recommendations
object JVMTuning {
case class GCTuningRecommendations(
heapSize: String,
gcAlgorithm: String,
additionalFlags: List[String],
reasoning: String
)
def recommendGCSettings(
applicationProfile: String,
expectedHeapUsage: Long,
latencyRequirements: Duration
): GCTuningRecommendations = {
applicationProfile.toLowerCase match {
case "low-latency" =>
GCTuningRecommendations(
heapSize = s"-Xmx${expectedHeapUsage * 2}m",
gcAlgorithm = "-XX:+UseZGC",
additionalFlags = List(
"-XX:+UnlockExperimentalVMOptions",
"-XX:+UseTransparentHugePages",
"-XX:+UncommitMemory"
),
reasoning = "ZGC provides consistent low latency with minimal pause times"
)
case "high-throughput" =>
GCTuningRecommendations(
heapSize = s"-Xmx${expectedHeapUsage}m",
gcAlgorithm = "-XX:+UseParallelGC",
additionalFlags = List(
"-XX:+UseAdaptiveSizePolicy",
"-XX:MaxGCPauseMillis=200",
"-XX:GCTimeRatio=19"
),
reasoning = "Parallel GC optimizes for maximum throughput"
)
case "balanced" =>
GCTuningRecommendations(
heapSize = s"-Xmx${expectedHeapUsage}m",
gcAlgorithm = "-XX:+UseG1GC",
additionalFlags = List(
"-XX:MaxGCPauseMillis=100",
"-XX:G1HeapRegionSize=16m",
"-XX:+G1UseAdaptiveIHOP"
),
reasoning = "G1GC provides good balance between latency and throughput"
)
case _ =>
GCTuningRecommendations(
heapSize = s"-Xmx${expectedHeapUsage}m",
gcAlgorithm = "-XX:+UseG1GC",
additionalFlags = List("-XX:MaxGCPauseMillis=100"),
reasoning = "Default G1GC settings for general purpose applications"
)
}
}
}
}
Profiling Tools and Techniques
Application Performance Monitoring
import java.lang.management.ManagementFactory
import javax.management.ObjectName
import scala.jdk.CollectionConverters._
// Comprehensive application profiler
class ApplicationProfiler {
case class ProfilerSession(
sessionId: String,
startTime: Long,
endTime: Option[Long] = None,
measurements: List[Measurement] = List.empty
)
sealed trait Measurement {
def timestamp: Long
def category: String
}
case class MethodInvocation(
timestamp: Long,
className: String,
methodName: String,
duration: Long,
memoryAllocated: Long
) extends Measurement {
val category = "method"
}
case class GCEvent(
timestamp: Long,
gcName: String,
duration: Long,
memoryBefore: Long,
memoryAfter: Long
) extends Measurement {
val category = "gc"
}
case class ThreadSnapshot(
timestamp: Long,
threadCount: Int,
blockedThreads: Int,
waitingThreads: Int,
cpuUsage: Double
) extends Measurement {
val category = "threads"
}
private var currentSession: Option[ProfilerSession] = None
private val measurements = scala.collection.mutable.ListBuffer[Measurement]()
def startSession(sessionId: String): Unit = {
currentSession = Some(ProfilerSession(sessionId, System.currentTimeMillis()))
measurements.clear()
}
def endSession(): Option[ProfilerSession] = {
currentSession.map { session =>
val endedSession = session.copy(
endTime = Some(System.currentTimeMillis()),
measurements = measurements.toList
)
currentSession = None
endedSession
}
}
// Method-level profiling using bytecode instrumentation (conceptual)
def profileMethod[T](className: String, methodName: String)(operation: => T): T = {
val startTime = System.nanoTime()
val memoryBefore = getUsedMemory()
val result = operation
val endTime = System.nanoTime()
val memoryAfter = getUsedMemory()
val measurement = MethodInvocation(
timestamp = System.currentTimeMillis(),
className = className,
methodName = methodName,
duration = endTime - startTime,
memoryAllocated = memoryAfter - memoryBefore
)
measurements += measurement
result
}
// Continuous monitoring
def startContinuousMonitoring(intervalMs: Long = 1000): Unit = {
val monitoring = new Thread(() => {
while (currentSession.isDefined) {
captureSystemSnapshot()
Thread.sleep(intervalMs)
}
})
monitoring.setDaemon(true)
monitoring.start()
}
private def captureSystemSnapshot(): Unit = {
// GC monitoring
captureGCEvents()
// Thread monitoring
captureThreadSnapshot()
// Memory monitoring
captureMemorySnapshot()
}
private def captureGCEvents(): Unit = {
val gcBeans = ManagementFactory.getGarbageCollectorMXBeans.asScala
gcBeans.foreach { gc =>
val collectionCount = gc.getCollectionCount
val collectionTime = gc.getCollectionTime
// In a real implementation, we would track deltas
if (collectionCount > 0) {
val gcEvent = GCEvent(
timestamp = System.currentTimeMillis(),
gcName = gc.getName,
duration = collectionTime,
memoryBefore = 0, // Would track actual values
memoryAfter = 0
)
measurements += gcEvent
}
}
}
private def captureThreadSnapshot(): Unit = {
val threadBean = ManagementFactory.getThreadMXBean
val threadInfo = threadBean.dumpAllThreads(false, false)
val blockedCount = threadInfo.count(_.getThreadState == Thread.State.BLOCKED)
val waitingCount = threadInfo.count(t =>
t.getThreadState == Thread.State.WAITING || t.getThreadState == Thread.State.TIMED_WAITING
)
val cpuUsage = getCpuUsage()
val snapshot = ThreadSnapshot(
timestamp = System.currentTimeMillis(),
threadCount = threadInfo.length,
blockedThreads = blockedCount,
waitingThreads = waitingCount,
cpuUsage = cpuUsage
)
measurements += snapshot
}
private def captureMemorySnapshot(): Unit = {
// Memory monitoring implementation
}
private def getUsedMemory(): Long = {
val runtime = Runtime.getRuntime
runtime.totalMemory() - runtime.freeMemory()
}
private def getCpuUsage(): Double = {
val osBean = ManagementFactory.getOperatingSystemMXBean
osBean match {
case sunBean: com.sun.management.OperatingSystemMXBean =>
sunBean.getProcessCpuLoad
case _ => 0.0
}
}
// Analysis and reporting
def analyzeSession(session: ProfilerSession): PerformanceReport = {
val methodMeasurements = session.measurements.collect { case m: MethodInvocation => m }
val gcMeasurements = session.measurements.collect { case m: GCEvent => m }
val threadMeasurements = session.measurements.collect { case m: ThreadSnapshot => m }
val hotspots = findHotspots(methodMeasurements)
val memoryLeaks = detectMemoryLeaks(methodMeasurements)
val gcAnalysis = analyzeGC(gcMeasurements)
val threadAnalysis = analyzeThreads(threadMeasurements)
PerformanceReport(
sessionId = session.sessionId,
duration = session.endTime.getOrElse(System.currentTimeMillis()) - session.startTime,
hotspots = hotspots,
memoryLeaks = memoryLeaks,
gcAnalysis = gcAnalysis,
threadAnalysis = threadAnalysis
)
}
private def findHotspots(methods: List[MethodInvocation]): List[Hotspot] = {
methods.groupBy(m => s"${m.className}.${m.methodName}")
.map { case (methodName, invocations) =>
Hotspot(
methodName = methodName,
totalTime = invocations.map(_.duration).sum,
invocationCount = invocations.length,
averageTime = invocations.map(_.duration).sum / invocations.length,
totalMemory = invocations.map(_.memoryAllocated).sum
)
}
.toList
.sortBy(_.totalTime)
.reverse
.take(10)
}
private def detectMemoryLeaks(methods: List[MethodInvocation]): List[MemoryLeak] = {
// Simplified memory leak detection
List.empty
}
private def analyzeGC(gcEvents: List[GCEvent]): GCAnalysis = {
val totalGCTime = gcEvents.map(_.duration).sum
val gcCount = gcEvents.length
val averageGCTime = if (gcCount > 0) totalGCTime / gcCount else 0
GCAnalysis(
totalGCTime = totalGCTime,
gcCount = gcCount,
averageGCTime = averageGCTime,
maxGCTime = gcEvents.map(_.duration).maxOption.getOrElse(0)
)
}
private def analyzeThreads(threadSnapshots: List[ThreadSnapshot]): ThreadAnalysis = {
val maxThreads = threadSnapshots.map(_.threadCount).maxOption.getOrElse(0)
val avgCpuUsage = threadSnapshots.map(_.cpuUsage).sum / threadSnapshots.length
val maxBlockedThreads = threadSnapshots.map(_.blockedThreads).maxOption.getOrElse(0)
ThreadAnalysis(
maxThreadCount = maxThreads,
averageCpuUsage = avgCpuUsage,
maxBlockedThreads = maxBlockedThreads
)
}
}
case class PerformanceReport(
sessionId: String,
duration: Long,
hotspots: List[Hotspot],
memoryLeaks: List[MemoryLeak],
gcAnalysis: GCAnalysis,
threadAnalysis: ThreadAnalysis
)
case class Hotspot(
methodName: String,
totalTime: Long,
invocationCount: Int,
averageTime: Long,
totalMemory: Long
)
case class MemoryLeak(
location: String,
suspectedObjects: List[String],
growthRate: Double
)
case class GCAnalysis(
totalGCTime: Long,
gcCount: Int,
averageGCTime: Long,
maxGCTime: Long
)
case class ThreadAnalysis(
maxThreadCount: Int,
averageCpuUsage: Double,
maxBlockedThreads: Int
)
// Integration with external profiling tools
object ExternalProfilers {
// JProfiler integration
object JProfilerIntegration {
def startCPUProfiling(): Unit = {
// JProfiler API calls (when available)
try {
val controller = Class.forName("com.jprofiler.api.agent.Controller")
val startCPURecording = controller.getMethod("startCPURecording", classOf[Boolean])
startCPURecording.invoke(null, java.lang.Boolean.TRUE)
} catch {
case _: Exception => println("JProfiler not available")
}
}
def stopCPUProfiling(): Unit = {
try {
val controller = Class.forName("com.jprofiler.api.agent.Controller")
val stopCPURecording = controller.getMethod("stopCPURecording")
stopCPURecording.invoke(null)
} catch {
case _: Exception => // Ignore
}
}
}
// JFR (Java Flight Recorder) integration
object JFRIntegration {
def startRecording(duration: Duration, filename: String): Unit = {
val command = s"jcmd ${ProcessHandle.current().pid()} JFR.start duration=${duration.toSeconds}s filename=$filename"
Runtime.getRuntime.exec(command)
}
def stopRecording(): Unit = {
val command = s"jcmd ${ProcessHandle.current().pid()} JFR.stop"
Runtime.getRuntime.exec(command)
}
}
// Async Profiler integration
object AsyncProfilerIntegration {
def profile(duration: Duration, event: String = "cpu"): Unit = {
// Assuming async-profiler is available
val pid = ProcessHandle.current().pid()
val command = s"java -jar async-profiler.jar -e $event -d ${duration.toSeconds} $pid"
Runtime.getRuntime.exec(command)
}
}
}
Conclusion
Performance optimization in Scala requires a systematic approach combining measurement, analysis, and targeted improvements. Key principles include:
Measurement Foundation:
- Establish baseline performance metrics
- Use micro-benchmarks for specific operations
- Monitor production performance continuously
- Profile memory allocation and GC behavior
Optimization Strategies:
- Algorithm and data structure optimization
- Memory management and allocation reduction
- JVM tuning and garbage collection optimization
- Concurrent programming optimizations
Profiling Techniques:
- Method-level performance monitoring
- Memory allocation tracking
- GC event analysis
- Thread contention detection
Tools and Frameworks:
- Built-in JVM monitoring capabilities
- External profilers (JProfiler, VisualVM, Async Profiler)
- Java Flight Recorder for production monitoring
- Custom profiling frameworks for specific needs
Best Practices:
- Measure before optimizing
- Focus on hotspots and critical paths
- Consider memory allocation patterns
- Balance between different optimization goals
- Test optimizations under realistic conditions
Common Pitfalls to Avoid:
- Premature optimization without measurement
- Optimizing non-critical code paths
- Ignoring memory allocation impact
- Over-optimization leading to complex code
- Not considering concurrent access patterns
Production Considerations:
- Monitor key performance indicators
- Set up alerting for performance degradation
- Regular performance regression testing
- Capacity planning based on performance characteristics
- A/B testing for optimization effectiveness
Performance optimization is an iterative process that requires careful measurement, analysis, and validation. The goal is to build applications that are not only correct but also efficient, scalable, and maintainable. Modern Scala applications can achieve excellent performance when these principles are applied systematically.
Comments
Be the first to comment on this lesson!