# Example of how to perform redundancy analysis (RDA) # in R, but from Python via RPy. # # Created 7/23/08 # Ryan Dale from rpy import * from StringIO import StringIO import numpy as npy # Data are from Legendre & Legendre's Numerical Ecology, Table 11.3. # These are constructed data used to illustrate RDA. sp1 thru sp10 are # fish at each of 10 sitesl; each site has a depth (1-10) and a substrate type # (coral, sand, or other). # Why use StringIO? It mimics a file-like object, so you can easily plug in your # own file with your own data, like so: # # data = open('mydata.txt') # # (as long as it's space-delimited) data = StringIO(""" site sp1 sp2 sp3 sp4 sp5 sp6 depth coral sand other 1 1 0 0 0 0 0 1 0 1 0 2 0 0 0 0 0 0 2 0 1 0 3 0 1 0 0 0 0 3 0 1 0 4 11 4 0 0 8 1 4 0 0 1 5 11 5 17 7 0 0 5 1 0 0 6 9 6 0 0 6 2 6 0 0 1 7 9 7 13 10 0 0 7 1 0 0 8 7 8 0 0 4 3 8 0 0 1 9 7 9 10 13 0 0 9 1 0 0 10 5 10 0 0 2 4 10 0 0 1 """) #------------------------ # Load and condition data #------------------------ # (this section is independent of R) # Since both Y and X matrices are in the same table, specify at which column # the X matrix begins. startX = 7 # Remove any blank lines at the beginning of the data. line = data.readline() while len(line.rstrip().split()) == 0: line = data.readline() # line should now be the header row, so split to get the variable names. variable_names = line.rstrip().split() # Keep X and Y variable names separate. Yvars = variable_names[1:startX] Xvars = variable_names[startX:] # Split the remaining lines, which will contain data. Then # ditch any trailing blank lines (length is zero after being split) values = [i.rstrip().split() for i in data] values = [i for i in values if len(i)>0] # Error checking: make sure lines are all the same length lens = [len(i) for i in values] unique_lens = set(lens) assert len(unique_lens) == 1 # First column contains object names. Don't want these in the # Y matrix. object_names = [i[0] for i in values] # Create Y and X matrices. Start at index 1 so that you skip # the object names. Y = [i[1:startX] for i in values] Y = npy.array(Y, dtype=float) X = [i[startX:] for i in values] X = npy.array(X, dtype=float) #-------------------------- # Send data to R #-------------------------- # Now, send them to R r.assign('Y', Y) r.assign('X', X) r.assign('xvars', Xvars) r.assign('yvars', Yvars) r.assign('objects', object_names) #-------------------------- # Some setup in R #-------------------------- # In R, set up dataframes out of the stuff that was just passed in. r(""" Y = data.frame(Y, row.names=objects) X = data.frame(X, row.names=objects) names(Y) = yvars names(X) = xvars """) # We'll be using the rda() method in the vegan package, so load it up r('require(vegan)') # This auto-generates a model description based on Xvars. # Creates a string, something like "Y~depth+sand+coral+other" . . . # but what it actually contains depends on if you used your own data or not. model_formula = 'Y~' + '+'.join(Xvars) #------------------------ # Perform the RDA #------------------------ r('xrda = rda(%s, X)' % model_formula) # Plot it! r('plot(xrda, scaling=2)') #----------------------------------------------- # Calculate variance explained by the ordination #----------------------------------------------- # Get the RDA object back from R. x = r('xrda') # It took a little reverse engineering to figure out where to find # the necessary values in the dictionary. # Percent variance explained by the constrained axes constrained = x['CCA']['tot.chi'] / x['tot.chi'] # Percent variance explained by the unconstrained axes. unconstrained = x['CA']['tot.chi'] / x['tot.chi'] # Percent variance explained by RDA1 and RDA2 axes. RDA1 = x['CCA']['eig']['RDA1'] / x['tot.chi'] RDA2 = x['CCA']['eig']['RDA2'] / x['tot.chi'] # Obtain P value (performs a permutation test to see if sum of eigenvalues # is higher than 95% of all random permutations of data) a = r('anova(xrda)') P = a['Pr(>F)'][0] print 'Const: %.3f' % constrained print 'Unconst: %.3f' % unconstrained print 'RDA1: %.3f' % RDA1 print 'RDA2: %.3f' % RDA2 print 'P = %.3f' % P # Save plot: r('dev.copy(png, file="RPy-demo.png")') r('dev.off()')