Some statistics calculators written for Python 2.x (the str.format() method requires Python >= 2.6). Of course you could just use the numpy or statistics libraries, but where’s the fun in that?
All solutions verified where possible by Wolfram Alpha or else by EasyCalculation.com.
import math
numList = [10, 12, 12, 34, 5, 25, 26, 8, 45, 55]
numList2 = [12, 45, 23, 9, 65, 67, 45, 34, 49, 22]
def mean(numList):
""" Calculates the arithmetic mean of a list of numbers """
finalMean = 0.0
for num in numList:
finalMean += num
finalMean = finalMean / float(len(numList))
return finalMean
#print "{0:.4f}".format(finalMean)
mean(numList)
def weightedMean(numList):
""" Calculates the weighted mean of a list of numbers """
total = 0
totalWeight = 0
normalizedWeights = []
# Set up some lists for our weights, weighted values, and weighted means
weights = [1 + n for n in range(len(numList))]
normalizedWeights = [0 for n in range(len(numList))]
# Generate a total of all weights
totalWeight = reduce(lambda y,x: x+y, weights)
# Divide each weight by the sum of all weights
for q, r in enumerate(weights):
normalizedWeights[q] = r / float(totalWeight)
# Add values of original numList multiplied by weighted values
for q, r in enumerate(numList):
total += r * normalizedWeights[q]
#return total
print "{0:.4f}".format(total)
weightedMean(numList)
def median(numList):
""" Calculate the median of a list of numbers """
""" The middle value in the set """
tempList = sorted(numList)
index = (len(tempList) - 1) // 2
# If the set has an even number of entries, combine the middle two
# Otherwise print the middle value
if len(tempList) % 2 == 0:
print "{0:.4f}".format((tempList[index] + tempList[index + 1]) / 2.0)
else:
print tempList[index]
median(numList)
def mode(numList):
""" Calculates the mode of a list of numbers """
""" The mode is the most common value in a set """
sortedCount = []
# Set up a dictionary (we prefer unique keys) to hold our counts
count = {}
for num in numList:
if num not in count.keys():
count[num] = 1
else:
count[num] += 1
# Simply print the mode (key with largest value)
print max(count, key=count.get)
# Sort the dictionary by values and print them
sortedCount = sorted(count.iteritems(), key = lambda(k,v):v, reverse=True)
for k, v in sortedCount:
# To print non-uniques only
#if v > 1:
print "{0}: {1}".format(k, v)
mode(numList)
def standardDeviation(numList):
""" Calculate the (population) standard deviation of a mean """
""" Distance of a given number from the mean """
# Call our mean function defined above
newMean = float(mean(numList))
tempList = [0 for n in range(len(numList))]
finalDeviation = 0
# Create a temp data set with (each value minus the mean) squared
for q, r in enumerate(numList):
tempList[q] = float((numList[q]-newMean)**2)
# Pass temp data set to mean() and return its square root
finalDeviation = math.sqrt(float(mean(tempList)))
#print "{0:.4f}".format(finalDeviation)
return finalDeviation
standardDeviation(numList)
def distributions(loadedDie):
""" Variance and standard deviation of a probability distribution """
""" Standard dev function above measures data set, not distribution """
expectedValue = 0.0
variance = 0.0
standardDev = 0.0
# Get the expected value
for q, r in enumerate(loadedDie):
expectedValue += (1 + q) * r
# Get the variance
for q, r in enumerate(loadedDie):
variance += ((1 + q)**2) * r
variance = variance - expectedValue**2
print "Variance: {0:.4f}".format(variance)
print "Standard Dev: {0:.4f}".format(math.sqrt(variance))
# Simulate a loaded six-sided die where 5 and 6 are twice as likely
# Using 1.0 to force float conversion for Python 2, else import __future__.division
# Python 3 automatically converts fractions to float
loadedDie = [1.0/8, 1.0/8, 1.0/8, 1.0/8, 1.0/4, 1.0/4]
#normal die
#loadedDie = [1.0/6, 1.0/6, 1.0/6, 1.0/6, 1.0/6, 1.0/6]
distributions(loadedDie)
def covariance(numList, numList2):
""" Measures the correlation of two variables """
""" The mean of the products of the deviations of each data set from its mean """
finalCovariance = 0.0
# Find the product of the two result sets
for q, r in enumerate(numList):
finalCovariance += numList[q] * numList2[q]
# Divide by size of sample set, then subtract mean1 * mean2
finalCovariance /= len(numList)
finalCovariance -= float(mean(numList)) * float(mean(numList2))
return finalCovariance
covariance(numList, numList2)
def correlation(numList, numList2):
""" Compute the correlation coefficient """
sum1 = 0.0
sum2 = 0.0
sum1Sq = 0.0
sum2Sq = 0.0
coeffFinal = 0.0
for num in numList:
sum1 += num
sum1Sq += num**2
for num2 in numList2:
sum2 += num2
sum2Sq += num2**2
coeffFinal = len(numList)**2 * covariance(numList, numList2) /
math.sqrt(((len(numList) * sum1Sq) - (sum1**2)) *
((len(numList) * sum2Sq) - (sum2**2)))
print "{0:.4f}".format(coeffFinal)
correlation(numList, numList2)
![]()