# 295. Find Median from Data Stream

## Description

The median is the middle value in an ordered integer list. If the size of the list is even, there is no middle value, and the median is the mean of the two middle values.

• For example, for arr = [2,3,4], the median is 3.
• For example, for arr = [2,3], the median is (2 + 3) / 2 = 2.5.

Implement the MedianFinder class:

• MedianFinder() initializes the MedianFinder object.
• void addNum(int num) adds the integer num from the data stream to the data structure.
• double findMedian() returns the median of all elements so far. Answers within 10-5 of the actual answer will be accepted.

Example 1:

Input
[[], [1], [2], [], [3], []]
Output
[null, null, null, 1.5, null, 2.0]

Explanation
MedianFinder medianFinder = new MedianFinder();
medianFinder.addNum(2);    // arr = [1, 2]
medianFinder.findMedian(); // return 1.5 (i.e., (1 + 2) / 2)
medianFinder.findMedian(); // return 2.0


Constraints:

• -105 <= num <= 105
• There will be at least one element in the data structure before calling findMedian.
• At most 5 * 104 calls will be made to addNum and findMedian.

• If all integer numbers from the stream are in the range [0, 100], how would you optimize your solution?
• If 99% of all integer numbers from the stream are in the range [0, 100], how would you optimize your solution?

## Solutions

Since the data in the data stream is not in order, we should first think of a way to make it in ordered. If we use a vector to save the data stream, we must sort the array every time a new data comes in, which is not efficient.

Use large and small heaps to solve the problem, where the large heap holds the larger numbers in the right half, and the small heap holds the smaller numbers in the left half. In this way, the entire array is divided into two sections in the middle. Since the heap is saved from large to small, we hope that the data in the large pile is from small to large, so it is convenient to take the first one to calculate the median.

##### Q-1: If all integer numbers from the stream are between 0 and 100, how would you optimize it?
• We can maintain an integer array of length 100 to store the count of each number along with a total count. Then, we can iterate over the array to find the middle value to get our median. Time and space complexity would be O(100) = O(1).
##### Q-2: If 99% of all integer numbers from the stream are between 0 and 100, how would you optimize it?
• In this case, we can keep a counter for lessThanHundred and greaterThanHundred. As we know the solution will be definitely in 0-100 we don’t need to know those number which are >100 or <0, only count of them will be enough.
• class MedianFinder {
private PriorityQueue<Integer> q1 = new PriorityQueue<>();
private PriorityQueue<Integer> q2 = new PriorityQueue<>(Collections.reverseOrder());

/** initialize your data structure here. */
public MedianFinder() {
}

q1.offer(num);
q2.offer(q1.poll());
if (q2.size() - q1.size() > 1) {
q1.offer(q2.poll());
}
}

public double findMedian() {
if (q2.size() > q1.size()) {
return q2.peek();
}
return (q1.peek() + q2.peek()) * 1.0 / 2;
}
}

/**
* Your MedianFinder object will be instantiated and called as such:
* MedianFinder obj = new MedianFinder();
* double param_2 = obj.findMedian();
*/

• class MedianFinder {
public:
/** initialize your data structure here. */
MedianFinder() {
}

q1.push(num);
q2.push(q1.top());
q1.pop();
if (q2.size() - q1.size() > 1) {
q1.push(q2.top());
q2.pop();
}
}

double findMedian() {
if (q2.size() > q1.size()) {
return q2.top();
}
return (double) (q1.top() + q2.top()) / 2;
}

private:
priority_queue<int, vector<int>, greater<int>> q1;
priority_queue<int> q2;
};

/**
* Your MedianFinder object will be instantiated and called as such:
* MedianFinder* obj = new MedianFinder();
* double param_2 = obj->findMedian();
*/

• from heapq import heappush, heappop

class MedianFinder:

def __init__(self):
# the smaller half of the list, max heap (invert min-heap)
self.smallerHalf = []
# the larger half of the list, min heap
self.largerHalf = []

def addNum(self, num: int) -> None:

# trick for smaller half, use -1*val
# heapq in python does NOT have comparator like in Java
heappush(self.smallerHalf, -num)
heappush(self.largerHalf, -heappop(self.smallerHalf)) # note: not self.smallerHalf.pop()

if len(self.smallerHalf) < len(self.largerHalf):
heappush(self.smallerHalf, -heappop(self.largerHalf))

def findMedian(self) -> float:
if len(self.smallerHalf) == len(self.largerHalf):
return (-self.smallerHalf[0] + self.largerHalf[0]) / 2.0
else:
return float(-self.smallerHalf[0])

# Your MedianFinder object will be instantiated and called as such:
# obj = MedianFinder()
# param_2 = obj.findMedian()

class MedianFinder:
def __init__(self):
self.counts = [0] * 101
self.total = 0

def addNum(self, num: int) -> None:
self.counts[num] += 1
self.total += 1

def findMedian(self) -> float:
if self.total % 2 == 0:
# even number of elements
middle1 = self.total // 2
middle2 = middle1 + 1
count = 0
i1 = i2 = 0
for i in range(101):
count += self.counts[i]
if count >= middle1 and i1 == 0:
i1 = i
if count >= middle2:
i2 = i
break
return (i1 + i2) / 2
else:
# odd number of elements
middle = self.total // 2 + 1
count = 0
for i in range(101):
count += self.counts[i]
if count >= middle:
return i

############

class MedianFinder:
def __init__(self):
"""
"""
self.h1 = []
self.h2 = []

def addNum(self, num: int) -> None:
heappush(self.h1, num)
heappush(self.h2, -heappop(self.h1))
if len(self.h2) - len(self.h1) > 1:
heappush(self.h1, -heappop(self.h2))

def findMedian(self) -> float:
if len(self.h2) > len(self.h1):
return -self.h2[0]
return (self.h1[0] - self.h2[0]) / 2

# Your MedianFinder object will be instantiated and called as such:
# obj = MedianFinder()
# param_2 = obj.findMedian()


• type MedianFinder struct {
q1 hp
q2 hp
}

/** initialize your data structure here. */
func Constructor() MedianFinder {
return MedianFinder{hp{}, hp{} }
}

func (this *MedianFinder) AddNum(num int) {
heap.Push(&this.q1, num)
heap.Push(&this.q2, -heap.Pop(&this.q1).(int))
if this.q2.Len()-this.q1.Len() > 1 {
heap.Push(&this.q1, -heap.Pop(&this.q2).(int))
}
}

func (this *MedianFinder) FindMedian() float64 {
if this.q2.Len() > this.q1.Len() {
return -float64(this.q2.IntSlice[0])
}
return float64(this.q1.IntSlice[0]-this.q2.IntSlice[0]) / 2.0
}

/**
* Your MedianFinder object will be instantiated and called as such:
* obj := Constructor();
* param_2 := obj.FindMedian();
*/

type hp struct{ sort.IntSlice }

func (h hp) Less(i, j int) bool { return h.IntSlice[i] < h.IntSlice[j] }
func (h *hp) Push(v any)        { h.IntSlice = append(h.IntSlice, v.(int)) }
func (h *hp) Pop() any {
a := h.IntSlice
v := a[len(a)-1]
h.IntSlice = a[:len(a)-1]
return v
}

• class MedianFinder {
private nums: number[];

constructor() {
this.nums = [];
}

const { nums } = this;
let l = 0;
let r = nums.length;
while (l < r) {
const mid = (l + r) >>> 1;
if (nums[mid] < num) {
l = mid + 1;
} else {
r = mid;
}
}
nums.splice(l, 0, num);
}

findMedian(): number {
const { nums } = this;
const n = nums.length;
if ((n & 1) === 1) {
return nums[n >> 1];
}
return (nums[n >> 1] + nums[(n >> 1) - 1]) / 2;
}
}

/**
* Your MedianFinder object will be instantiated and called as such:
* var obj = new MedianFinder()
* var param_2 = obj.findMedian()
*/


• /**
* initialize your data structure here.
*/
var MedianFinder = function () {
this.val = [];
};

/**
* @param {number} num
* @return {void}
*/
let left = 0;
let right = this.val.length;
while (left < right) {
let mid = left + ~~((right - left) / 2);
if (num > this.val[mid]) {
left = mid + 1;
} else {
right = mid;
}
}
this.val.splice(left, 0, num);
};

/**
* @return {number}
*/
MedianFinder.prototype.findMedian = function () {
let mid = ~~(this.val.length / 2);
return this.val.length % 2 ? this.val[mid] : (this.val[mid - 1] + this.val[mid]) / 2;
};


• public class MedianFinder {
private List<int> nums;
private int curIndex;

/** initialize your data structure here. */
public MedianFinder() {
nums = new List<int>();
}

private int FindIndex(int val) {
int left = 0;
int right = nums.Count - 1;
while (left <= right) {
int mid = left + (right - left) / 2;
if (val > nums[mid]) {
left = mid + 1;
} else {
right = mid - 1;
}
}
return left;
}

if (nums.Count == 0) {
curIndex = 0;
} else {
curIndex = FindIndex(num);
if (curIndex == nums.Count) {
} else {
nums.Insert(curIndex, num);
}
}
}

public double FindMedian() {
if (nums.Count % 2 == 1) {
return (double)nums[nums.Count / 2];
} else {
if (nums.Count == 0) {
return 0;
} else {
return (double) (nums[nums.Count / 2 - 1] + nums[nums.Count / 2]) / 2;
}
}
}
}

/**
* Your MedianFinder object will be instantiated and called as such:
* MedianFinder obj = new MedianFinder();
* double param_2 = obj.FindMedian();
*/

• struct MedianFinder {
nums: Vec<i32>,
}

/**
* &self means the method takes an immutable reference.
* If you need a mutable reference, change it to &mut self instead.
*/
impl MedianFinder {
/** initialize your data structure here. */
fn new() -> Self {
Self { nums: Vec::new() }
}

fn add_num(&mut self, num: i32) {
let mut l = 0;
let mut r = self.nums.len();
while l < r {
let mid = (l + r) >> 1;
if self.nums[mid] < num {
l = mid + 1;
} else {
r = mid;
}
}
self.nums.insert(l, num);
}

fn find_median(&self) -> f64 {
let n = self.nums.len();
if (n & 1) == 1 {
return f64::from(self.nums[n >> 1]);
}
f64::from(self.nums[n >> 1] + self.nums[(n >> 1) - 1]) / 2.0
}
}/**
* Your MedianFinder object will be instantiated and called as such:
* let obj = MedianFinder::new();
* let ret_2: f64 = obj.find_median();
*/


• class MedianFinder {
private var minQ = Heap<Int>(sort: <)
private var maxQ = Heap<Int>(sort: >)

init() {
}

maxQ.insert(num)
minQ.insert(maxQ.remove()!)
if maxQ.count < minQ.count {
maxQ.insert(minQ.remove()!)
}
}

func findMedian() -> Double {
if maxQ.count > minQ.count {
return Double(maxQ.peek()!)
}
return (Double(maxQ.peek()!) + Double(minQ.peek()!)) / 2.0
}
}

struct Heap<T> {
var elements: [T]
let sort: (T, T) -> Bool

init(sort: @escaping (T, T) -> Bool, elements: [T] = []) {
self.sort = sort
self.elements = elements
if !elements.isEmpty {
for i in stride(from: elements.count / 2 - 1, through: 0, by: -1) {
siftDown(from: i)
}
}
}

var isEmpty: Bool {
return elements.isEmpty
}

var count: Int {
return elements.count
}

func peek() -> T? {
return elements.first
}

mutating func insert(_ value: T) {
elements.append(value)
siftUp(from: elements.count - 1)
}

mutating func remove() -> T? {
guard !elements.isEmpty else { return nil }
elements.swapAt(0, elements.count - 1)
let removedValue = elements.removeLast()
siftDown(from: 0)
return removedValue
}

private mutating func siftUp(from index: Int) {
var child = index
var parent = parentIndex(ofChildAt: child)
while child > 0 && sort(elements[child], elements[parent]) {
elements.swapAt(child, parent)
child = parent
parent = parentIndex(ofChildAt: child)
}
}

private mutating func siftDown(from index: Int) {
var parent = index
while true {
let left = leftChildIndex(ofParentAt: parent)
let right = rightChildIndex(ofParentAt: parent)
var candidate = parent
if left < count && sort(elements[left], elements[candidate]) {
candidate = left
}
if right < count && sort(elements[right], elements[candidate]) {
candidate = right
}
if candidate == parent {
return
}
elements.swapAt(parent, candidate)
parent = candidate
}
}

private func parentIndex(ofChildAt index: Int) -> Int {
return (index - 1) / 2
}

private func leftChildIndex(ofParentAt index: Int) -> Int {
return 2 * index + 1
}

private func rightChildIndex(ofParentAt index: Int) -> Int {
return 2 * index + 2
}
}

/**
* Your MedianFinder object will be instantiated and called as such:
* let obj = MedianFinder()
* let ret_2: Double = obj.findMedian()
*/