IRLS Regression

Example A.9. IRLS Regression

 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
#
#                   AdvancedMiner Example Script
#            Copyright Algolytics sp. z o. o. 2004-2015
#
# Example description: IRLS Regression model building, testing,
#                      statistics calculation and application
#
# Input data: StatLib-Datasets Archive: http://stat.cmu.edu/datasets/
#             The data set came from:
#        Miller, A.J., Shaw, D.E., Veitch, L.G. & Smith, E.J. (1979).
#       `Analyzing the results of a cloud-seeding experiment in Tasmania',
#       Communications in Statistics - Theory & Methods, vol.A8(10), 1017-1047.
#
# Data Description:
#  period denotes periodical rainfalls in inches.   TE and TW are the rainfalls 
#   for the east and
#  west target areas respectively, while NC, SC and NWC are the corresponding
#  rainfalls in the north, south and north-west control areas respectively.
#  S = seeded, U = unseeded.
#
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

table 'cloud' :
     period seeded season NC SC NWC TE 
     1.0 1 'AUTUMN' 1.65 1.8 3.33 1.69 
     2.0 0 'AUTUMN' 1.09 0.79 1.59 0.74 
     3.0 1 'WINTER' 2.39 0.36 2.06 0.81 
     4.0 0 'WINTER' 2.96 1.27 4.05 1.44 
     5.0 1 'WINTER' 4.16 2.16 6.0 2.48 
     6.0 0 'WINTER' 2.76 0.87 4.17 0.84 
     7.0 0 'WINTER' 1.08 0.85 3.45 0.37 
     8.0 1 'WINTER' 0.26 0.47 0.9 0.37 
     9.0 0 'SPRING' 2.53 1.08 3.65 1.33 
     10.0 1 'SPRING' 2.76 3.1 5.06 3.38 
     11.0 1 'SPRING' 1.07 0.64 1.95 0.69 
     12.0 0 'SPRING' 1.42 1.08 1.22 1.42 
     13.0 1 'SPRING' 0.24 0.44 0.94 0.44 
     14.0 0 'SPRING' 0.7 0.67 0.94 0.76 
     15.0 1 'SUMMER' 0.97 1.66 2.21 1.13 
     16.0 0 'SUMMER' 1.06 1.13 1.46 0.88 
     45.0 1 'SUMMER' 0.13 0.27 0.35 0.17 
     46.0 0 'SUMMER' 0.1 0.3 0.34 0.25 
     47.0 0 'SUMMER' 0.38 0.58 0.67 0.78 
     48.0 1 'SUMMER' 0.45 0.43 0.44 0.4 
     49.0 1 'AUTUMN' 0.42 0.47 0.53 0.52 
     50.0 0 'AUTUMN' 2.24 4.02 2.52 2.73 
     51.0 0 'AUTUMN' 0.52 1.32 2.18 0.9 
     52.0 1 'AUTUMN' 0.94 1.59 1.73 1.62 
     53.0 0 'AUTUMN' 1.19 0.85 2.31 0.93 
     54.0 1 'AUTUMN' 0.76 0.71 1.28 0.63 
     55.0 1 'WINTER' 0.13 0.59 0.91 0.42 
     56.0 0 'WINTER' 1.5 0.24 1.15 0.64 
     57.0 0 'WINTER' 1.03 0.22 1.88 0.3 
     58.0 1 'WINTER' 1.87 0.58 2.97 0.88 
     59.0 0 'WINTER' 1.85 1.36 2.17 0.76 
     60.0 1 'WINTER' 2.04 0.71 2.22 1.25 
     61.0 0 'WINTER' 1.44 1.0 1.64 1.08 
     62.0 1 'WINTER' 1.46 1.48 0.4 1.11 
     63.0 1 'SPRING' 5.08 1.77 4.2 3.43 
     64.0 0 'SPRING' 0.66 0.73 0.91 0.54 
     65.0 1 'SPRING' 0.49 0.55 0.51 0.39 
     66.0 0 'SPRING' 3.27 2.68 3.6 2.53 
     67.0 0 'SPRING' 1.33 0.43 2.18 0.81 
     68.0 1 'SPRING' 0.25 0.46 0.89 0.39 
     69.0 1 'SUMMER' 0.69 0.49 0.69 0.86 
     70.0 0 'SUMMER' 2.12 0.95 1.82 2.16 
     95.0 0 'SPRING' 1.45 1.47 2.2 1.7 
     96.0 1 'SPRING' 2.13 1.13 2.33 1.22 
     97.0 1 'SPRING' 0.02 0.08 0.24 0.07 
     98.0 0 'SPRING' 0.36 0.87 0.57 0.49 
     99.0 0 'SPRING' 0.72 0.99 0.98 0.71 
     100.0 1 'SPRING' 1.02 1.89 2.47 1.67 
     101.0 0 'SUMMER' 0.18 1.42 0.71 0.73 
     102.0 1 'SUMMER' 1.83 1.82 3.11 1.79 
     103.0 0 'SUMMER' 0.08 0.4 0.57 0.19 
     104.0 1 'SUMMER' 0.0 0.04 0.04 0.0 
     105.0 1 'SUMMER' 0.83 0.38 0.7 0.44 
     106.0 0 'SUMMER' 0.01 0.44 0.66 0.31 
     107.0 1 'SUMMER' 2.65 0.85 1.48 0.96 
     108.0 0 'SUMMER' 1.27 1.39 1.2 1.04 
     109.0 1 'AUTUMN' 0.01 0.23 0.1 0.05 
     110.0 0 'AUTUMN' 0.35 0.75 0.2 0.04 
     111.0 1 'AUTUMN' 1.8 1.62 3.02 1.83 
     112.0 0 'AUTUMN' 4.44 1.05 3.59 2.24 
     113.0 1 'AUTUMN' 2.84 2.44 4.48 2.5 
     114.0 0 'AUTUMN' 2.05 1.3 4.04 1.1 
     115.0 1 'AUTUMN' 3.01 1.66 4.56 1.83 
     116.0 0 'AUTUMN' 2.58 1.21 3.95 1.41 
     117.0 0 'WINTER' 2.22 0.61 2.68 0.74 
     118.0 1 'WINTER' 0.07 2.26 2.08 1.09 
     119.0 1 'WINTER' 1.62 1.16 2.87 0.79 
     120.0 0 'WINTER' 4.34 3.29 6.4 4.06 
     121.0 0 'WINTER' 1.03 0.58 1.77 0.4 
     122.0 1 'WINTER' 1.5 0.41 2.56 0.76 
     123.0 1 'SPRING' 1.52 1.62 2.86 1.53 
     124.0 0 'SPRING' 0.37 1.25 1.74 0.56 
     125.0 0 'SPRING' 2.14 1.0 4.39 1.74 
     126.0 1 'SPRING' 2.36 1.53 3.03 1.59 
     127.0 0 'SPRING' 1.71 2.03 3.24 1.91 
     128.0 1 'SPRING' 2.12 2.77 4.44 2.09 
     129.0 0 'SUMMER' 1.38 2.11 3.01 1.59 
     130.0 1 'SUMMER' 0.21 1.41 0.8 0.66 
     131.0 0 'SUMMER' 0.48 0.59 0.68 0.68 
     132.0 1 'SUMMER' 0.01 0.65 0.48 0.46 
     133.0 1 'SUMMER' 0.15 0.13 0.42 0.22 
     134.0 0 'SUMMER' 1.32 0.57 1.54 1.11 
     135.0 1 'SUMMER' 2.26 1.04 1.27 1.76 
     136.0 0 'SUMMER' 5.95 3.97 5.37 5.12 
     171.0 0 'AUTUMN' 0.19 0.28 0.7 0.12 
     172.0 1 'AUTUMN' 0.31 0.23 0.83 0.37 
     173.0 1 'AUTUMN' 1.44 3.14 0.86 4.97 
     174.0 0 'AUTUMN' 0.3 0.72 1.38 0.57 
     175.0 1 'AUTUMN' 0.11 0.14 0.58 0.13 
     176.0 0 'AUTUMN' 3.66 1.84 5.36 2.47 
     177.0 0 'AUTUMN' 1.14 0.81 2.09 1.01 
     178.0 1 'AUTUMN' 1.3 0.34 2.45 0.55 
     179.0 1 'WINTER' 0.05 0.38 0.9 0.24 
     180.0 0 'WINTER' 1.84 1.73 2.33 2.36 
     181.0 1 'WINTER' 4.24 1.67 5.48 2.35 
     182.0 0 'WINTER' 1.99 1.9 3.67 2.23 
     183.0 0 'WINTER' 2.44 1.52 4.01 1.16 
     184.0 1 'WINTER' 2.21 2.36 3.25 1.63 
     185.0 1 'WINTER' 0.8 2.25 2.79 1.08 
     186.0 0 'WINTER' 9.42 3.6 7.84 6.0 
     187.0 1 'SPRING' 2.74 3.03 6.39 2.67 
     188.0 0 'SPRING' 0.0 0.19 0.06 0.36 
     189.0 1 'SPRING' 0.96 0.64 1.24 0.58 
     190.0 0 'SPRING' 1.38 1.86 2.91 1.36 
     191.0 1 'SPRING' 1.22 2.28 1.58 1.17 
     192.0 0 'SPRING' 2.46 2.47 2.39 2.37 
     193.0 1 'SPRING' 0.05 0.02 0.09 0.02 
     194.0 0 'SPRING' 0.61 0.87 1.35 0.92

pd = PhysicalData('cloud')
ld  = LogicalData(pd)



#------ Approximation function settings ------

fs = ApproximationFunctionSettings()
fs.logicalData = ld
fs.targetAttributeName = 'TE'
fs.attributeUsageSet.getAttribute('season').\
        setUsage(UsageOption.inactive)
fs.attributeUsageSet.getAttribute('seeded').\
        setUsage(UsageOption.inactive)

#------ Linear Regression algorithm settings ------

as = IRLSSettings()
as.preselection = TRUE
as.intercept = TRUE

vss = VariableSelectionSettings()
vss.variableSelectionMethod = VariableSelectionMethod.full


as.variableSelectionSettings = vss
fs.algorithmSettings = as

save('cloud_pd', pd)
save('cloud_ld', ld)
save('reg_settings', fs)

#------ model building ------

bt = MiningBuildTask('cloud_pd', 'reg_settings', 'reg_model')
save('cloud_build', bt)
execute('cloud_build')
print "Build task for IRLS regression example was successfully executed"

#------ model testing ------

tt = ApproximationTestTask('cloud_pd', 'reg_model', 'cloud_out')
tt.testDataTargetAttributeName = 'TE'
save('cloud_test', tt)
execute('cloud_test')
print "Test task for IRLS regression example was successfully executed"


#----- model application ------

pdout = PhysicalData('cloud_apply')
save('cloud_pd_apply', pdout)
at = MiningApplyTask()
at.modelName = 'reg_model'
at.sourceDataName = 'cloud_pd'
at.targetDataName = 'cloud_pd_apply'
at.replaceExistingData = TRUE

directMapping = java.util.ArrayList()
asi = ApplySourceItem()
asi.sourceName = 'TE'
asi.destinationName = 'actual_target'
directMapping.add(asi)
at.setDirectMapping(directMapping)

ao = ApproximationApplyOutput()
aai = ApproximationOutputItem()
aai.setDestinationName('predicted_target')
aai.setOutputType(ApproximationOutputType.predictedValue)
ao.item.add(aai)

at.applyOutput = ao

save('cloud_apply', at)
execute('cloud_apply')

print "Apply task for IRLS regression example was successfully executed"
print 
# print model fit


statModel = load('reg_model').getModelStatistics()
statNames = statModel.getModelStatNames()
print "Model statistics \t value"
for row in range(len(statNames)):
    print statNames[row], " \t ", statModel.getModelStatValue(row)

print    
statModel = load('reg_model')
varStats =statModel.getModelStatistics().getVariableStatistics()
print "Variable \t Coeff \t VIF"
for v in varStats.getNames():
    print v, "\t", varStats.getVarStatValue(v,"Coeff"), "\t", varStats.getVarStatValue(v,"VIF")


Output

Build task for IRLS regression example was successfully executed
Test task for IRLS regression example was successfully executed
Apply task for IRLS regression example was successfully executed

Model statistics 	 value
dfR  	  4.0
SSR  	  92.55068427595378
MSR  	  23.137671068988446
dfE  	  103.0
SSE  	  6.71296171838018
MSE  	  0.06517438561534156
dfT  	  107.0
SST  	  99.26364599433397
F-test  	  355.011725090079
Pr>F  	  0.0
s  	  0.255292744932835
Rsq  	  0.9323724043063725
ADJRsq  	  0.9297460899105034

Variable 	 Coeff 	 VIF
Intercept 	 -0.066814006025922 	 NaN
period 	 4.707606205758679E-4 	 1.1405808437704104
NC 	 0.472707106434072 	 4.1130343485278
SC 	 0.637350132259273 	 2.777231115978932
NWC 	 -0.10911174793879069 	 5.334078732767622