""" Author: Mathieu Perreault Lecture 19: Correlation and other statistics """ import matplotlib.pyplot as plt import numpy as np import sys,math import csv # This assumes that points are in order and formatted as x\ty x = [] y = [] r = csv.reader(open(sys.argv[1]), delimiter='\t') for row in r: x.append(float(row[0])) y.append(float(row[1])) x_mean = sum(x)/len(x) y_mean = sum(y)/len(y) # Computing the correlation cov_sum = 0. y_sum_sq = 0. x_sum_sq = 0. for i in range(len(x)): xv = x[i] yv = y[i] cov_sum += (xv-x_mean)*(yv-y_mean) x_sum_sq += (xv-x_mean)**2 y_sum_sq += (yv-y_mean)**2 cor = cov_sum/math.sqrt(x_sum_sq*y_sum_sq) # could also do cor = np.corrcoef(x,y)[0][1] # Linear regression params = np.polyfit(x, y, 1) yp = np.polyval(params, x) plt.figure() plt.suptitle('Nice title!', fontsize=12) plt.plot(x, y, 'ro') plt.plot(x, yp, 'b') plt.legend(["Corr: %.3f, Std.: %.3f" % (cor, np.std(y))]) #plt.text(25000, 20., "Corr: %.3f, Std.: %.3f" % (cor, np.std(y))) plt.axhline(y=15., color='k', linestyle='--') plt.axvline(x=30000, color='k', linestyle='--') plt.show()