Welcome, guest | Sign In | My Account | Store | Cart
"""
    @author    Thomas Lehmann
    @file      Main.py
    @brief     correlation and regression analyse

    Referring to document at (german):
    http://www.faes.de/Basis/Basis-Statistik/Basis-Statistik-Korrelation-Re/basis-statistik-korrelation-re.html
"""
import sys
import math

EPSILON = 0.0000001

class SimpleLinearRegression:
    """ tool class as help for calculating a linear function """
    def __init__(self, data):
        """ initializes members with defaults """
        self.data = data   # list of (x,y) pairs
        self.a    = 0      # "a" of y = a + b*x
        self.b    = 0      # "b" of y = a + b*x
        self.r    = 0      # coefficient of correlation

    def run(self):
        """ calculates coefficient of correlation and
            the parameters for the linear function """
        sumX, sumY, sumXY, sumXX, sumYY = 0, 0, 0, 0, 0
        n = float(len(self.data))

        for x, y in self.data:
            sumX  += x
            sumY  += y
            sumXY += x*y
            sumXX += x*x
            sumYY += y*y

        denominator = math.sqrt((sumXX - 1/n * sumX**2)*(sumYY - 1/n * sumY**2))
        if denominator < EPSILON:
            return False

        # coefficient of correlation
        self.r  = (sumXY - 1/n * sumX * sumY)
        self.r /= denominator

        # is there no relationship between 'x' and 'y'?
        if abs(self.r) < EPSILON:
            return False

        # calculating 'a' and 'b' of y = a + b*x
        self.b  = sumXY - sumX * sumY / n
        self.b /= (sumXX - sumX**2 / n)

        self.a  = sumY - self.b * sumX
        self.a /= n
        return True

    def function(self, x):
        """ linear function (be aware of current
            coefficient of correlation """
        return self.a + self.b * x

    def __repr__(self):
        """ current linear function for print """
        return "y = f(x) = %(a)f + %(b)f*x" % self.__dict__

def example():
    """ provides an example with error rates (one per session)
        @note linear function verified in open office calc """
    print("Simple linear regression v0.2 by Thomas Lehmann 2012")
    print("...Python %s" % sys.version.replace("\n", ""))
    data   = [(1.0, 18.0), (2, 15.0), (3, 19.0), (4, 10.0)]
    data   = [(1.0, 18.0), (2, 18.0)]

    print("...data is %s" % data)

    linRegr = SimpleLinearRegression(data)
    if not linRegr.run():
        print("...error: failed to calculate parameters")
        return

    print("...the coefficient of correlation r = %f (r**2 is %f)" % (linRegr.r, linRegr.r**2))
    print("...parameter a of y = f(x) = a + b*x is %f" % linRegr.a)
    print("...parameter b of y = f(x) = a + b*x is %f" % linRegr.b)
    print("...linear function is then %s" % linRegr)
    print("...forecast of next value: f(5) = %f" % linRegr.function(5))

    firstY = linRegr.function(1)
    lastY  = linRegr.function(4)
    change = (lastY - firstY) / firstY * 100.0

    # keep in mind: reducing of error rate (inverse valuation)!
    if change < 0:
        print("...the trend is about %.1f%% improvement" % -change)
    else:
        print("...the trend is about %.1f%% to the worse" % change)

if __name__ == "__main__":
    example()

Diff to Previous Revision

--- revision 1 2012-05-12 10:36:51
+++ revision 2 2012-05-12 13:35:14
@@ -8,6 +8,8 @@
 """
 import sys
 import math
+
+EPSILON = 0.0000001
 
 class SimpleLinearRegression:
     """ tool class as help for calculating a linear function """
@@ -31,15 +33,16 @@
             sumXX += x*x
             sumYY += y*y
 
-        try:
-            # coefficient of correlation
-            self.r  = (sumXY - 1/n * sumX * sumY)
-            self.r /= math.sqrt((sumXX - 1/n * sumX**2)*(sumYY - 1/n * sumY**2))
-        except ZeroDivisionError as error:
+        denominator = math.sqrt((sumXX - 1/n * sumX**2)*(sumYY - 1/n * sumY**2))
+        if denominator < EPSILON:
             return False
 
+        # coefficient of correlation
+        self.r  = (sumXY - 1/n * sumX * sumY)
+        self.r /= denominator
+
         # is there no relationship between 'x' and 'y'?
-        if abs(self.r) < 0.0000001:
+        if abs(self.r) < EPSILON:
             return False
 
         # calculating 'a' and 'b' of y = a + b*x
@@ -62,9 +65,10 @@
 def example():
     """ provides an example with error rates (one per session)
         @note linear function verified in open office calc """
-    print("Simple linear regression v0.1 by Thomas Lehmann 2012")
+    print("Simple linear regression v0.2 by Thomas Lehmann 2012")
     print("...Python %s" % sys.version.replace("\n", ""))
     data   = [(1.0, 18.0), (2, 15.0), (3, 19.0), (4, 10.0)]
+    data   = [(1.0, 18.0), (2, 18.0)]
 
     print("...data is %s" % data)
 

History