Data Report

data from Prof. Kim Soo Yong

Created by

Rischan Mafrur

Chonnam National University of South Korea

May 25, 2014

User classification based on Broadcast log data. Load the dataset

broadcast <- read.csv("broadcast.csv", header=TRUE)
names(broadcast)
##  [1] "sumcallin"     "caountcallin"  "sumcallout"    "countcallout" 
##  [5] "sumreceived"   "countreceived" "sumsent"       "countsent"    
##  [9] "user"          "personality"

Removing personality columns

broadcast$personality <- NULL

Using randomForest for classification

library(randomForest)
fit <- randomForest(broadcast$user ~ broadcast$sumcallin+broadcast$caountcallin+broadcast$sumcallout+
                      broadcast$countcallout+broadcast$sumreceived+broadcast$countreceived+
                      broadcast$sumsent+broadcast$countsent, data=broadcast)

Printing the model information and the confusion matrix result for user classification

print(fit)
## 
## Call:
##  randomForest(formula = broadcast$user ~ broadcast$sumcallin +      broadcast$caountcallin + broadcast$sumcallout + broadcast$countcallout +      broadcast$sumreceived + broadcast$countreceived + broadcast$sumsent +      broadcast$countsent, data = broadcast) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 2
## 
##         OOB estimate of  error rate: 76.78%
## Confusion matrix:
##             eight eighteen eleven five fiveteen fourteen nine one
## eight          13        0      0    0        0        4    0   2
## eighteen        0       14      0    0        0        0    4   0
## eleven          0        0     12    0        4        1    0   4
## five            2        1      2    3        2        3    0   1
## fiveteen        0        0      3    0       20        2    0   2
## fourteen        4        0      0    2        2       13    0   4
## nine            1        6      0    0        0        1    3   0
## one             1        1      4    1        2        6    0  11
## seventeen       0        4      0    1        1        0    1   0
## six             1        2      2    0        0        3    2   2
## ten             0        0      1    1        0        1    0   0
## thirty          0        5      0    1        1        0    1   0
## thirtyfive      0        5      0    1        0        0    1   0
## thirtyfour      0        2      5    0        5        0    1   1
## thirtyone       0        4      0    1        0        0    0   0
## thirtysix       1        3      1    2        2        1    0   2
## thirtythree     5        0      1    1        3        3    1   0
## thirtytwo       2        3      0    0        0        6    1   1
## three           1        3      1    0        0        0    0   0
## twelve          1        0      0    1        0        1    1   0
## twenty          0        0      0    1        1        1    5   2
## twentyfive      2        2      1    1        1        0    1   0
## twentyfour      0        1      3    1        4        0    1   0
## twentynine      0        0      1    0        0        2    0   1
## twentyseven     0        3      0    0        1        0    4   0
##             seventeen six ten thirty thirtyfive thirtyfour thirtyone
## eight               0   1   0      0          0          0         0
## eighteen            1   2   1      3          2          2         1
## eleven              0   1   1      0          0          1         0
## five                0   1   1      3          0          2         4
## fiveteen            1   0   0      0          0          0         0
## fourteen            0   0   0      0          0          0         1
## nine                1   2   1      2          2          2         0
## one                 0   2   0      0          0          3         2
## seventeen           1   4   1      1          2          0         2
## six                 2   5   1      0          3          2         2
## ten                 3   2   2      3          2          0         2
## thirty              4   2   2      0          4          1         2
## thirtyfive          1   3   1      0          6          5         3
## thirtyfour          0   1   1      0          3          3         0
## thirtyone           4   3   0      1          6          0         5
## thirtysix           1   0   1      0          0          2         0
## thirtythree         0   1   3      0          1          0         1
## thirtytwo           1   2   3      0          1          2         2
## three               1   0   1      0          1          1         1
## twelve              0   0   0      0          1          1         1
## twenty              6   1   0      2          0          4         0
## twentyfive          2   1   1      0          1          0         0
## twentyfour          1   0   1      2          0          1         0
## twentynine          0   0   0      0          0          1         1
## twentyseven         1   0   3      1          0          2         0
##             thirtysix thirtythree thirtytwo three twelve twenty twentyfive
## eight               2           5         0     2      0      1          2
## eighteen            0           0         1     0      0      1          0
## eleven              0           0         2     1      0      0          2
## five                3           0         0     2      0      0          3
## fiveteen            0           3         0     1      0      1          0
## fourteen            0           6         1     0      0      0          2
## nine                1           0         1     0      1      4          1
## one                 2           0         0     0      0      0          0
## seventeen           1           1         0     0      1      6          1
## six                 0           1         1     0      1      2          1
## ten                 0           2         1     0      0      4          1
## thirty              1           0         1     2      1      2          0
## thirtyfive          1           1         0     1      0      2          0
## thirtyfour          2           0         1     1      0      4          0
## thirtyone           0           1         1     1      2      1          0
## thirtysix           5           2         1     3      0      0          1
## thirtythree         0           5         0     2      3      1          1
## thirtytwo           1           1         0     3      1      2          0
## three               1           0         0    14      1      0          5
## twelve              0           3         1     0     14      5          1
## twenty              0           0         1     0      0      7          4
## twentyfive          0           2         0     8      2      1          3
## twentyfour          3           0         0     3      0      0          0
## twentynine          0           1         1     0      0      2          1
## twentyseven         0           0         0     1      1      2          2
##             twentyfour twentynine twentyseven class.error
## eight                0          2           0      0.6176
## eighteen             0          0           4      0.6111
## eleven               2          1           0      0.6250
## five                 1          0           0      0.9118
## fiveteen             4          0           0      0.4595
## fourteen             0          0           0      0.6286
## nine                 0          2           2      0.9091
## one                  1          0           0      0.6944
## seventeen            1          1           4      0.9706
## six                  0          0           0      0.8485
## ten                  2          3           4      0.9412
## thirty               2          2           0      1.0000
## thirtyfive           2          0           0      0.8182
## thirtyfour           1          2           0      0.9091
## thirtyone            0          1           1      0.8438
## thirtysix            3          0           3      0.8529
## thirtythree          3          3           0      0.8684
## thirtytwo            1          1           0      1.0000
## three                1          0           2      0.5882
## twelve               2          0           1      0.5882
## twenty               0          2           1      0.8158
## twentyfive           0          2           3      0.9118
## twentyfour          12          0           1      0.6471
## twentynine           0         21           2      0.3824
## twentyseven          0          5           7      0.7879
importance(fit)
##                         MeanDecreaseGini
## broadcast$sumcallin               115.49
## broadcast$caountcallin             87.90
## broadcast$sumcallout              118.34
## broadcast$countcallout             84.90
## broadcast$sumreceived             119.62
## broadcast$countreceived            77.01
## broadcast$sumsent                  64.72
## broadcast$countsent                51.41

===========================================================================================
===========================================================================================

User classification based on Broadcast log data and psicology data only two class (introvert and extrovert). Load the dataset again

broadcast <- read.csv("broadcast.csv", header=TRUE)
names(broadcast)
##  [1] "sumcallin"     "caountcallin"  "sumcallout"    "countcallout" 
##  [5] "sumreceived"   "countreceived" "sumsent"       "countsent"    
##  [9] "user"          "personality"

Removing user columns from dataset

broadcast$user <- NULL

Using randomForest for classification

library(randomForest)
fit <- randomForest(broadcast$personality ~ broadcast$sumcallin+broadcast$caountcallin+broadcast$sumcallout+
                      broadcast$countcallout+broadcast$sumreceived+broadcast$countreceived+
                      broadcast$sumsent+broadcast$countsent, data=broadcast)

Printing the model information and the confusion matrix result for user classification (introvert and extrovert)

print(fit)
## 
## Call:
##  randomForest(formula = broadcast$personality ~ broadcast$sumcallin +      broadcast$caountcallin + broadcast$sumcallout + broadcast$countcallout +      broadcast$sumreceived + broadcast$countreceived + broadcast$sumsent +      broadcast$countsent, data = broadcast) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 2
## 
##         OOB estimate of  error rate: 37.34%
## Confusion matrix:
##       extro intro class.error
## extro   336   143      0.2985
## intro   177   201      0.4683
importance(fit)
##                         MeanDecreaseGini
## broadcast$sumcallin                63.60
## broadcast$caountcallin             45.63
## broadcast$sumcallout               68.66
## broadcast$countcallout             42.40
## broadcast$sumreceived              66.25
## broadcast$countreceived            36.78
## broadcast$sumsent                  35.25
## broadcast$countsent                27.08