# Python: Statistics Calculators

Some statistics calculators written for Python 2.x (the str.format() method requires Python >= 2.6). Of course you could just use the numpy or statistics libraries, but where’s the fun in that?

All solutions verified where possible by Wolfram Alpha or else by EasyCalculation.com.

```import math

numList = [10, 12, 12, 34, 5, 25, 26, 8, 45, 55]
numList2 = [12, 45, 23, 9, 65, 67, 45, 34, 49, 22]

def mean(numList):
""" Calculates the arithmetic mean of a list of numbers """

finalMean = 0.0

for num in numList:
finalMean += num
finalMean = finalMean / float(len(numList))
return finalMean
#print "{0:.4f}".format(finalMean)

mean(numList)

def weightedMean(numList):
""" Calculates the weighted mean of a list of numbers """

total = 0
totalWeight = 0
normalizedWeights = []

# Set up some lists for our weights, weighted values, and weighted means
weights = [1 + n for n in range(len(numList))]
normalizedWeights = [0 for n in range(len(numList))]

# Generate a total of all weights
totalWeight = reduce(lambda y,x: x+y, weights)

# Divide each weight by the sum of all weights
for q, r in enumerate(weights):
normalizedWeights[q] = r / float(totalWeight)

# Add values of original numList multiplied by weighted values
for q, r in enumerate(numList):
total += r * normalizedWeights[q]

print "{0:.4f}".format(total)

weightedMean(numList)

def median(numList):
""" Calculate the median of a list of numbers """
""" The middle value in the set """

tempList = sorted(numList)
index = (len(tempList) - 1) // 2

# If the set has an even number of entries, combine the middle two
# Otherwise print the middle value
if len(tempList) % 2 == 0:
print "{0:.4f}".format((tempList[index] + tempList[index + 1]) / 2.0)
else:
print tempList[index]

median(numList)

def mode(numList):
""" Calculates the mode of a list of numbers """
""" The mode is the most common value in a set """

sortedCount = []
# Set up a dictionary (we prefer unique keys) to hold our counts
count = {}
for num in numList:
if num not in count.keys():
count[num] = 1
else:
count[num] += 1

# Simply print the mode (key with largest value)
print max(count, key=count.get)

# Sort the dictionary by values and print them
sortedCount = sorted(count.iteritems(), key = lambda(k,v):v, reverse=True)
for k, v in sortedCount:
# To print non-uniques only
#if v > 1:
print "{0}: {1}".format(k, v)

mode(numList)

def standardDeviation(numList):
""" Calculate the (population) standard deviation of a mean """
""" Distance of a given number from the mean """

# Call our mean function defined above
newMean = float(mean(numList))
tempList = [0 for n in range(len(numList))]
finalDeviation = 0

# Create a temp data set with (each value minus the mean) squared
for q, r in enumerate(numList):
tempList[q] = float((numList[q]-newMean)**2)

# Pass temp data set to mean() and return its square root
finalDeviation = math.sqrt(float(mean(tempList)))

#print "{0:.4f}".format(finalDeviation)
return finalDeviation

standardDeviation(numList)

""" Variance and standard deviation of a probability distribution """
""" Standard dev function above measures data set, not distribution """

expectedValue = 0.0
variance = 0.0
standardDev = 0.0

# Get the expected value
expectedValue += (1 + q) * r

# Get the variance
variance += ((1 + q)**2) * r

variance = variance - expectedValue**2

print "Variance: {0:.4f}".format(variance)
print "Standard Dev: {0:.4f}".format(math.sqrt(variance))

# Simulate a loaded six-sided die where 5 and 6 are twice as likely
# Using 1.0 to force float conversion for Python 2, else import __future__.division
# Python 3 automatically converts fractions to float
loadedDie = [1.0/8, 1.0/8, 1.0/8, 1.0/8, 1.0/4, 1.0/4]
#normal die
#loadedDie = [1.0/6, 1.0/6, 1.0/6, 1.0/6, 1.0/6, 1.0/6]

def covariance(numList, numList2):
""" Measures the correlation of two variables """
""" The mean of the products of the deviations of each data set from its mean """

finalCovariance = 0.0

# Find the product of the two result sets
for q, r in enumerate(numList):
finalCovariance += numList[q] * numList2[q]

# Divide by size of sample set, then subtract mean1 * mean2
finalCovariance /= len(numList)
finalCovariance -= float(mean(numList)) * float(mean(numList2))

return finalCovariance

covariance(numList, numList2)

def correlation(numList, numList2):
""" Compute the correlation coefficient """

sum1 = 0.0
sum2 = 0.0
sum1Sq = 0.0
sum2Sq = 0.0
coeffFinal = 0.0

for num in numList:
sum1 += num
sum1Sq += num**2

for num2 in numList2:
sum2 += num2
sum2Sq += num2**2

coeffFinal = len(numList)**2 * covariance(numList, numList2) /
math.sqrt(((len(numList) * sum1Sq) - (sum1**2)) *
((len(numList) * sum2Sq) - (sum2**2)))

print "{0:.4f}".format(coeffFinal)

correlation(numList, numList2)
``` 