Statistics - Sampling Distribution
Table of Contents
1 - About
Distribution of estimated statistics from different samples (same size) from the same population is called a sampling distribution
It permits to make probability judgement about samples.
Because of the central limit theorem, sampling distributions are known to be normal and therefore are fundamental to inferential statistics because they allow for probabilistic predictions about outcomes.
2 - Articles Related
3 - Demonstration
The code below showcase the fact that a sample distribution created from the mean of a lot of sample from the same population has a normal form.
// A helper function to draw an histogram
function histogram(params) {
selector = params.selector
data = params.data;
// data
min = d3.min(data);
max = d3.max(data);
// Graphics data
var margin = { top: 30, right: 30, bottom: 30, left: 50 },
width = 460 - margin.left - margin.right,
height = 400 - margin.top - margin.bottom;
// The number of bins
Nbin = 20;
// Histogram gets the threshold from the x ticks
// X axis (the ticks of the x axis will be the threshold/breaks of the histogram function)
var x = d3
.scaleLinear()
.domain([min, max]) // can use this instead of 1000 to have the max of data: d3.max(data, function(d) { return +d.price })
.range([0, width]); // Map of the data to the graphic
// set the parameters for the histogram
var histogram = d3
.histogram()
.domain(x.domain()) // then the domain of the graphic
.thresholds(x.ticks(Nbin)); // then the numbers of bins
// And apply this function to data to get the bins
var bins = histogram(data);
// append the svg object to the body of the page
// Set the dimensions and margins of the graph
var svg = d3
.select("#"+selector)
.append("svg")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
// Add the x axis
svg
.append("g")
.attr("transform", "translate(0," + height + ")")
.call(d3.axisBottom(x));
// Y axis: scale and draw:
var y = d3
.scaleLinear()
.range([height, 0])
.domain([
0,
d3.max(bins, function(d) {
return d.length;
})
]);
svg.append("g").call(d3.axisLeft(y));
// // append the bar rectangles to the svg element
svg
.selectAll("rect")
.data(bins)
.enter()
.append("rect")
.attr("x", 1)
.attr("transform", function(d) {
return "translate(" + x(d.x0) + "," + y(d.length) + ")";
})
.attr("width", function(d) {
return x(d.x1) - x(d.x0) - 1;
})
.attr("height", function(d) {
return height - y(d.length);
})
.style("fill", "#69b3a2");
}
- Creating the population data randomly distributed
population_n = 10000;
population_data = [];
population_max = 100;
population_data = [];
for (i = 0; i < population_n; i++) {
random_value = Math.floor(Math.random() * Math.floor(population_max));
population_data.push(random_value);
}
histogram({ selector: "population", data: population_data});
- Sampling the population 1000 times with a sample size of 20, calculating the mean and adding it to the sample distribution
// Sample Data
sample_distribution_data = [];
sample_distribution_n = 1000;
for (j = 0; j < sample_distribution_n; j++) {
sample_data = [];
sample_n = 20;
for (i = 0; i < sample_n; i++) {
population_random_index = Math.floor(
Math.random() * Math.floor(population_max)
);
sample_data.push(population_data[population_random_index]);
}
sample_distribution_data.push(d3.mean(sample_data));
}
histogram({ selector: "sample", data:sample_distribution_data});
<h1>The Population Distribution</h1>
<p>The population was generated with random data</p>
<div id="population"></div>
<h1>The Sample Distribution (Distribution of the sample mean)</h1>
<p>The sample distribution created from the mean of 1000 samples follows a normal distribution as stated by the Central Limit Theorem</p>
<div id="sample"></div>