Welcome, guest | Sign In | My Account | Store | Cart
```"""
@author    Thomas Lehmann
@file      Main.py
@brief     correlation and regression analyse

Referring to document at (german):
http://www.faes.de/Basis/Basis-Statistik/Basis-Statistik-Korrelation-Re/basis-statistik-korrelation-re.html
"""
import sys
import math

EPSILON = 0.0000001

class SimpleLinearRegression:
""" tool class as help for calculating a linear function """
def __init__(self, data):
""" initializes members with defaults """
self.data = data   # list of (x,y) pairs
self.a    = 0      # "a" of y = a + b*x
self.b    = 0      # "b" of y = a + b*x
self.r    = 0      # coefficient of correlation

def run(self):
""" calculates coefficient of correlation and
the parameters for the linear function """
sumX, sumY, sumXY, sumXX, sumYY = 0, 0, 0, 0, 0
n = float(len(self.data))

for x, y in self.data:
sumX  += x
sumY  += y
sumXY += x*y
sumXX += x*x
sumYY += y*y

denominator = math.sqrt((sumXX - 1/n * sumX**2)*(sumYY - 1/n * sumY**2))
if denominator < EPSILON:
return False

# coefficient of correlation
self.r  = (sumXY - 1/n * sumX * sumY)
self.r /= denominator

# is there no relationship between 'x' and 'y'?
if abs(self.r) < EPSILON:
return False

# calculating 'a' and 'b' of y = a + b*x
self.b  = sumXY - sumX * sumY / n
self.b /= (sumXX - sumX**2 / n)

self.a  = sumY - self.b * sumX
self.a /= n
return True

def function(self, x):
""" linear function (be aware of current
coefficient of correlation """
return self.a + self.b * x

def __repr__(self):
""" current linear function for print """
return "y = f(x) = %(a)f + %(b)f*x" % self.__dict__

def example():
""" provides an example with error rates (one per session)
@note linear function verified in open office calc """
print("Simple linear regression v0.2 by Thomas Lehmann 2012")
print("...Python %s" % sys.version.replace("\n", ""))
data   = [(1.0, 18.0), (2, 15.0), (3, 19.0), (4, 10.0)]
data   = [(1.0, 18.0), (2, 18.0)]

print("...data is %s" % data)

linRegr = SimpleLinearRegression(data)
if not linRegr.run():
print("...error: failed to calculate parameters")
return

print("...the coefficient of correlation r = %f (r**2 is %f)" % (linRegr.r, linRegr.r**2))
print("...parameter a of y = f(x) = a + b*x is %f" % linRegr.a)
print("...parameter b of y = f(x) = a + b*x is %f" % linRegr.b)
print("...linear function is then %s" % linRegr)
print("...forecast of next value: f(5) = %f" % linRegr.function(5))

firstY = linRegr.function(1)
lastY  = linRegr.function(4)
change = (lastY - firstY) / firstY * 100.0

# keep in mind: reducing of error rate (inverse valuation)!
if change < 0:
print("...the trend is about %.1f%% improvement" % -change)
else:
print("...the trend is about %.1f%% to the worse" % change)

if __name__ == "__main__":
example()
```

#### Diff to Previous Revision

```--- revision 1 2012-05-12 10:36:51
+++ revision 2 2012-05-12 13:35:14
@@ -8,6 +8,8 @@
"""
import sys
import math
+
+EPSILON = 0.0000001

class SimpleLinearRegression:
""" tool class as help for calculating a linear function """
@@ -31,15 +33,16 @@
sumXX += x*x
sumYY += y*y

-        try:
-            # coefficient of correlation
-            self.r  = (sumXY - 1/n * sumX * sumY)
-            self.r /= math.sqrt((sumXX - 1/n * sumX**2)*(sumYY - 1/n * sumY**2))
-        except ZeroDivisionError as error:
+        denominator = math.sqrt((sumXX - 1/n * sumX**2)*(sumYY - 1/n * sumY**2))
+        if denominator < EPSILON:
return False

+        # coefficient of correlation
+        self.r  = (sumXY - 1/n * sumX * sumY)
+        self.r /= denominator
+
# is there no relationship between 'x' and 'y'?
-        if abs(self.r) < 0.0000001:
+        if abs(self.r) < EPSILON:
return False

# calculating 'a' and 'b' of y = a + b*x
@@ -62,9 +65,10 @@
def example():
""" provides an example with error rates (one per session)
@note linear function verified in open office calc """
-    print("Simple linear regression v0.1 by Thomas Lehmann 2012")
+    print("Simple linear regression v0.2 by Thomas Lehmann 2012")
print("...Python %s" % sys.version.replace("\n", ""))
data   = [(1.0, 18.0), (2, 15.0), (3, 19.0), (4, 10.0)]
+    data   = [(1.0, 18.0), (2, 18.0)]

print("...data is %s" % data)

```