Example A.9. IRLS Regression
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # AdvancedMiner Example Script # Copyright Algolytics sp. z o. o. 2004-2015 # # Example description: IRLS Regression model building, testing, # statistics calculation and application # # Input data: StatLib-Datasets Archive: http://stat.cmu.edu/datasets/ # The data set came from: # Miller, A.J., Shaw, D.E., Veitch, L.G. & Smith, E.J. (1979). # `Analyzing the results of a cloud-seeding experiment in Tasmania', # Communications in Statistics - Theory & Methods, vol.A8(10), 1017-1047. # # Data Description: # period denotes periodical rainfalls in inches. TE and TW are the rainfalls # for the east and # west target areas respectively, while NC, SC and NWC are the corresponding # rainfalls in the north, south and north-west control areas respectively. # S = seeded, U = unseeded. # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # table 'cloud' : period seeded season NC SC NWC TE 1.0 1 'AUTUMN' 1.65 1.8 3.33 1.69 2.0 0 'AUTUMN' 1.09 0.79 1.59 0.74 3.0 1 'WINTER' 2.39 0.36 2.06 0.81 4.0 0 'WINTER' 2.96 1.27 4.05 1.44 5.0 1 'WINTER' 4.16 2.16 6.0 2.48 6.0 0 'WINTER' 2.76 0.87 4.17 0.84 7.0 0 'WINTER' 1.08 0.85 3.45 0.37 8.0 1 'WINTER' 0.26 0.47 0.9 0.37 9.0 0 'SPRING' 2.53 1.08 3.65 1.33 10.0 1 'SPRING' 2.76 3.1 5.06 3.38 11.0 1 'SPRING' 1.07 0.64 1.95 0.69 12.0 0 'SPRING' 1.42 1.08 1.22 1.42 13.0 1 'SPRING' 0.24 0.44 0.94 0.44 14.0 0 'SPRING' 0.7 0.67 0.94 0.76 15.0 1 'SUMMER' 0.97 1.66 2.21 1.13 16.0 0 'SUMMER' 1.06 1.13 1.46 0.88 45.0 1 'SUMMER' 0.13 0.27 0.35 0.17 46.0 0 'SUMMER' 0.1 0.3 0.34 0.25 47.0 0 'SUMMER' 0.38 0.58 0.67 0.78 48.0 1 'SUMMER' 0.45 0.43 0.44 0.4 49.0 1 'AUTUMN' 0.42 0.47 0.53 0.52 50.0 0 'AUTUMN' 2.24 4.02 2.52 2.73 51.0 0 'AUTUMN' 0.52 1.32 2.18 0.9 52.0 1 'AUTUMN' 0.94 1.59 1.73 1.62 53.0 0 'AUTUMN' 1.19 0.85 2.31 0.93 54.0 1 'AUTUMN' 0.76 0.71 1.28 0.63 55.0 1 'WINTER' 0.13 0.59 0.91 0.42 56.0 0 'WINTER' 1.5 0.24 1.15 0.64 57.0 0 'WINTER' 1.03 0.22 1.88 0.3 58.0 1 'WINTER' 1.87 0.58 2.97 0.88 59.0 0 'WINTER' 1.85 1.36 2.17 0.76 60.0 1 'WINTER' 2.04 0.71 2.22 1.25 61.0 0 'WINTER' 1.44 1.0 1.64 1.08 62.0 1 'WINTER' 1.46 1.48 0.4 1.11 63.0 1 'SPRING' 5.08 1.77 4.2 3.43 64.0 0 'SPRING' 0.66 0.73 0.91 0.54 65.0 1 'SPRING' 0.49 0.55 0.51 0.39 66.0 0 'SPRING' 3.27 2.68 3.6 2.53 67.0 0 'SPRING' 1.33 0.43 2.18 0.81 68.0 1 'SPRING' 0.25 0.46 0.89 0.39 69.0 1 'SUMMER' 0.69 0.49 0.69 0.86 70.0 0 'SUMMER' 2.12 0.95 1.82 2.16 95.0 0 'SPRING' 1.45 1.47 2.2 1.7 96.0 1 'SPRING' 2.13 1.13 2.33 1.22 97.0 1 'SPRING' 0.02 0.08 0.24 0.07 98.0 0 'SPRING' 0.36 0.87 0.57 0.49 99.0 0 'SPRING' 0.72 0.99 0.98 0.71 100.0 1 'SPRING' 1.02 1.89 2.47 1.67 101.0 0 'SUMMER' 0.18 1.42 0.71 0.73 102.0 1 'SUMMER' 1.83 1.82 3.11 1.79 103.0 0 'SUMMER' 0.08 0.4 0.57 0.19 104.0 1 'SUMMER' 0.0 0.04 0.04 0.0 105.0 1 'SUMMER' 0.83 0.38 0.7 0.44 106.0 0 'SUMMER' 0.01 0.44 0.66 0.31 107.0 1 'SUMMER' 2.65 0.85 1.48 0.96 108.0 0 'SUMMER' 1.27 1.39 1.2 1.04 109.0 1 'AUTUMN' 0.01 0.23 0.1 0.05 110.0 0 'AUTUMN' 0.35 0.75 0.2 0.04 111.0 1 'AUTUMN' 1.8 1.62 3.02 1.83 112.0 0 'AUTUMN' 4.44 1.05 3.59 2.24 113.0 1 'AUTUMN' 2.84 2.44 4.48 2.5 114.0 0 'AUTUMN' 2.05 1.3 4.04 1.1 115.0 1 'AUTUMN' 3.01 1.66 4.56 1.83 116.0 0 'AUTUMN' 2.58 1.21 3.95 1.41 117.0 0 'WINTER' 2.22 0.61 2.68 0.74 118.0 1 'WINTER' 0.07 2.26 2.08 1.09 119.0 1 'WINTER' 1.62 1.16 2.87 0.79 120.0 0 'WINTER' 4.34 3.29 6.4 4.06 121.0 0 'WINTER' 1.03 0.58 1.77 0.4 122.0 1 'WINTER' 1.5 0.41 2.56 0.76 123.0 1 'SPRING' 1.52 1.62 2.86 1.53 124.0 0 'SPRING' 0.37 1.25 1.74 0.56 125.0 0 'SPRING' 2.14 1.0 4.39 1.74 126.0 1 'SPRING' 2.36 1.53 3.03 1.59 127.0 0 'SPRING' 1.71 2.03 3.24 1.91 128.0 1 'SPRING' 2.12 2.77 4.44 2.09 129.0 0 'SUMMER' 1.38 2.11 3.01 1.59 130.0 1 'SUMMER' 0.21 1.41 0.8 0.66 131.0 0 'SUMMER' 0.48 0.59 0.68 0.68 132.0 1 'SUMMER' 0.01 0.65 0.48 0.46 133.0 1 'SUMMER' 0.15 0.13 0.42 0.22 134.0 0 'SUMMER' 1.32 0.57 1.54 1.11 135.0 1 'SUMMER' 2.26 1.04 1.27 1.76 136.0 0 'SUMMER' 5.95 3.97 5.37 5.12 171.0 0 'AUTUMN' 0.19 0.28 0.7 0.12 172.0 1 'AUTUMN' 0.31 0.23 0.83 0.37 173.0 1 'AUTUMN' 1.44 3.14 0.86 4.97 174.0 0 'AUTUMN' 0.3 0.72 1.38 0.57 175.0 1 'AUTUMN' 0.11 0.14 0.58 0.13 176.0 0 'AUTUMN' 3.66 1.84 5.36 2.47 177.0 0 'AUTUMN' 1.14 0.81 2.09 1.01 178.0 1 'AUTUMN' 1.3 0.34 2.45 0.55 179.0 1 'WINTER' 0.05 0.38 0.9 0.24 180.0 0 'WINTER' 1.84 1.73 2.33 2.36 181.0 1 'WINTER' 4.24 1.67 5.48 2.35 182.0 0 'WINTER' 1.99 1.9 3.67 2.23 183.0 0 'WINTER' 2.44 1.52 4.01 1.16 184.0 1 'WINTER' 2.21 2.36 3.25 1.63 185.0 1 'WINTER' 0.8 2.25 2.79 1.08 186.0 0 'WINTER' 9.42 3.6 7.84 6.0 187.0 1 'SPRING' 2.74 3.03 6.39 2.67 188.0 0 'SPRING' 0.0 0.19 0.06 0.36 189.0 1 'SPRING' 0.96 0.64 1.24 0.58 190.0 0 'SPRING' 1.38 1.86 2.91 1.36 191.0 1 'SPRING' 1.22 2.28 1.58 1.17 192.0 0 'SPRING' 2.46 2.47 2.39 2.37 193.0 1 'SPRING' 0.05 0.02 0.09 0.02 194.0 0 'SPRING' 0.61 0.87 1.35 0.92 pd = PhysicalData('cloud') ld = LogicalData(pd) #------ Approximation function settings ------ fs = ApproximationFunctionSettings() fs.logicalData = ld fs.targetAttributeName = 'TE' fs.attributeUsageSet.getAttribute('season').\ setUsage(UsageOption.inactive) fs.attributeUsageSet.getAttribute('seeded').\ setUsage(UsageOption.inactive) #------ Linear Regression algorithm settings ------ as = IRLSSettings() as.preselection = TRUE as.intercept = TRUE vss = VariableSelectionSettings() vss.variableSelectionMethod = VariableSelectionMethod.full as.variableSelectionSettings = vss fs.algorithmSettings = as save('cloud_pd', pd) save('cloud_ld', ld) save('reg_settings', fs) #------ model building ------ bt = MiningBuildTask('cloud_pd', 'reg_settings', 'reg_model') save('cloud_build', bt) execute('cloud_build') print "Build task for IRLS regression example was successfully executed" #------ model testing ------ tt = ApproximationTestTask('cloud_pd', 'reg_model', 'cloud_out') tt.testDataTargetAttributeName = 'TE' save('cloud_test', tt) execute('cloud_test') print "Test task for IRLS regression example was successfully executed" #----- model application ------ pdout = PhysicalData('cloud_apply') save('cloud_pd_apply', pdout) at = MiningApplyTask() at.modelName = 'reg_model' at.sourceDataName = 'cloud_pd' at.targetDataName = 'cloud_pd_apply' at.replaceExistingData = TRUE directMapping = java.util.ArrayList() asi = ApplySourceItem() asi.sourceName = 'TE' asi.destinationName = 'actual_target' directMapping.add(asi) at.setDirectMapping(directMapping) ao = ApproximationApplyOutput() aai = ApproximationOutputItem() aai.setDestinationName('predicted_target') aai.setOutputType(ApproximationOutputType.predictedValue) ao.item.add(aai) at.applyOutput = ao save('cloud_apply', at) execute('cloud_apply') print "Apply task for IRLS regression example was successfully executed" print # print model fit statModel = load('reg_model').getModelStatistics() statNames = statModel.getModelStatNames() print "Model statistics \t value" for row in range(len(statNames)): print statNames[row], " \t ", statModel.getModelStatValue(row) print statModel = load('reg_model') varStats =statModel.getModelStatistics().getVariableStatistics() print "Variable \t Coeff \t VIF" for v in varStats.getNames(): print v, "\t", varStats.getVarStatValue(v,"Coeff"), "\t", varStats.getVarStatValue(v,"VIF")
Output
Build task for IRLS regression example was successfully executed Test task for IRLS regression example was successfully executed Apply task for IRLS regression example was successfully executed Model statistics value dfR 4.0 SSR 92.55068427595378 MSR 23.137671068988446 dfE 103.0 SSE 6.71296171838018 MSE 0.06517438561534156 dfT 107.0 SST 99.26364599433397 F-test 355.011725090079 Pr>F 0.0 s 0.255292744932835 Rsq 0.9323724043063725 ADJRsq 0.9297460899105034 Variable Coeff VIF Intercept -0.066814006025922 NaN period 4.707606205758679E-4 1.1405808437704104 NC 0.472707106434072 4.1130343485278 SC 0.637350132259273 2.777231115978932 NWC -0.10911174793879069 5.334078732767622