Python: Statistics Calculators

Some statistics calculators written for Python 2.x (the str.format() method requires Python >= 2.6). Of course you could just use the numpy or statistics libraries, but where’s the fun in that?

All solutions verified where possible by Wolfram Alpha or else by

import math

numList = [10, 12, 12, 34, 5, 25, 26, 8, 45, 55]
numList2 = [12, 45, 23, 9, 65, 67, 45, 34, 49, 22]

def mean(numList):
    """ Calculates the arithmetic mean of a list of numbers """

    finalMean = 0.0

    for num in numList:
        finalMean += num
    finalMean = finalMean / float(len(numList))
    return finalMean
    #print "{0:.4f}".format(finalMean)


def weightedMean(numList):
    """ Calculates the weighted mean of a list of numbers """

    total = 0
    totalWeight = 0
    normalizedWeights = []

    # Set up some lists for our weights, weighted values, and weighted means
    weights = [1 + n for n in range(len(numList))]
    normalizedWeights = [0 for n in range(len(numList))]

    # Generate a total of all weights
    totalWeight = reduce(lambda y,x: x+y, weights)
    # Divide each weight by the sum of all weights
    for q, r in enumerate(weights):
        normalizedWeights[q] = r / float(totalWeight)
    # Add values of original numList multiplied by weighted values
    for q, r in enumerate(numList):
        total += r * normalizedWeights[q]
    #return total
    print "{0:.4f}".format(total)

def median(numList):
    """ Calculate the median of a list of numbers """
    """ The middle value in the set """

    tempList = sorted(numList)
    index = (len(tempList) - 1) // 2
    # If the set has an even number of entries, combine the middle two
    # Otherwise print the middle value    
    if len(tempList) % 2 == 0:
        print "{0:.4f}".format((tempList[index] + tempList[index + 1]) / 2.0)
        print tempList[index]


def mode(numList):
    """ Calculates the mode of a list of numbers """
    """ The mode is the most common value in a set """
    sortedCount = []
    # Set up a dictionary (we prefer unique keys) to hold our counts
    count = {}
    for num in numList:
        if num not in count.keys():
            count[num] = 1
            count[num] += 1

    # Simply print the mode (key with largest value)
    print max(count, key=count.get)
    # Sort the dictionary by values and print them    
    sortedCount = sorted(count.iteritems(), key = lambda(k,v):v, reverse=True)
    for k, v in sortedCount:
        # To print non-uniques only
        #if v > 1:
        print "{0}: {1}".format(k, v)

def standardDeviation(numList):
    """ Calculate the (population) standard deviation of a mean """
    """ Distance of a given number from the mean """

    # Call our mean function defined above
    newMean = float(mean(numList))
    tempList = [0 for n in range(len(numList))]
    finalDeviation = 0

    # Create a temp data set with (each value minus the mean) squared
    for q, r in enumerate(numList):
        tempList[q] = float((numList[q]-newMean)**2)

    # Pass temp data set to mean() and return its square root
    finalDeviation = math.sqrt(float(mean(tempList)))
    #print "{0:.4f}".format(finalDeviation)
    return finalDeviation

def distributions(loadedDie):
    """ Variance and standard deviation of a probability distribution """
    """ Standard dev function above measures data set, not distribution """
    expectedValue = 0.0
    variance = 0.0
    standardDev = 0.0    
    # Get the expected value
    for q, r in enumerate(loadedDie):
        expectedValue += (1 + q) * r
    # Get the variance
    for q, r in enumerate(loadedDie):
        variance += ((1 + q)**2) * r
    variance = variance - expectedValue**2
    print "Variance: {0:.4f}".format(variance)
    print "Standard Dev: {0:.4f}".format(math.sqrt(variance))

# Simulate a loaded six-sided die where 5 and 6 are twice as likely
# Using 1.0 to force float conversion for Python 2, else import __future__.division
# Python 3 automatically converts fractions to float
loadedDie = [1.0/8, 1.0/8, 1.0/8, 1.0/8, 1.0/4, 1.0/4]
#normal die
#loadedDie = [1.0/6, 1.0/6, 1.0/6, 1.0/6, 1.0/6, 1.0/6]

def covariance(numList, numList2):
    """ Measures the correlation of two variables """
    """ The mean of the products of the deviations of each data set from its mean """

    finalCovariance = 0.0

    # Find the product of the two result sets
    for q, r in enumerate(numList):
        finalCovariance += numList[q] * numList2[q]
    # Divide by size of sample set, then subtract mean1 * mean2
    finalCovariance /= len(numList)
    finalCovariance -= float(mean(numList)) * float(mean(numList2))
    return finalCovariance
covariance(numList, numList2)    

def correlation(numList, numList2):
    """ Compute the correlation coefficient """
    sum1 = 0.0
    sum2 = 0.0
    sum1Sq = 0.0
    sum2Sq = 0.0
    coeffFinal = 0.0
    for num in numList:
        sum1 += num
        sum1Sq += num**2
    for num2 in numList2:
        sum2 += num2
        sum2Sq += num2**2
    coeffFinal = len(numList)**2 * covariance(numList, numList2) / 
        math.sqrt(((len(numList) * sum1Sq) - (sum1**2)) * 
                                    ((len(numList) * sum2Sq) - (sum2**2)))
    print "{0:.4f}".format(coeffFinal)

correlation(numList, numList2)


Leave a Reply

Your email address will not be published. Required fields are marked *