{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "setwd(\"/media/sf_share/linux/car\")" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# vehicles <- read.csv(unz(\"vehicles.csv.zip\", \"vehicles.csv\"), stringsAs Factors = F)\n", "vehicles = read.csv(\"input/vehicles.csv\",)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\n", "
barrels08barrelsA08charge120charge240city08city08UcityA08cityA08UcityCDcityEmfrCodec240Dscrcharge240bc240bDscrcreatedOnmodifiedOnstartStopphevCityphevHwyphevComb
15.69571 0 0 0 19 0 0 0 0 0 0 Tue Jan 01 00:00:00 EST 2013Tue Jan 01 00:00:00 EST 2013 0 0 0
29.96455 0 0 0 9 0 0 0 0 0 0 Tue Jan 01 00:00:00 EST 2013Tue Jan 01 00:00:00 EST 2013 0 0 0
12.20778 0 0 0 23 0 0 0 0 0 0 Tue Jan 01 00:00:00 EST 2013Tue Jan 01 00:00:00 EST 2013 0 0 0
29.96455 0 0 0 10 0 0 0 0 0 0 Tue Jan 01 00:00:00 EST 2013Tue Jan 01 00:00:00 EST 2013 0 0 0
17.34789 0 0 0 17 0 0 0 0 0 0 Tue Jan 01 00:00:00 EST 2013Tue Jan 01 00:00:00 EST 2013 0 0 0
14.98227 0 0 0 21 0 0 0 0 0 0 Tue Jan 01 00:00:00 EST 2013Tue Jan 01 00:00:00 EST 2013 0 0 0
\n" ], "text/latex": [ "\\begin{tabular}{r|lllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllll}\n", " barrels08 & barrelsA08 & charge120 & charge240 & city08 & city08U & cityA08 & cityA08U & cityCD & cityE & ⋯ & mfrCode & c240Dscr & charge240b & c240bDscr & createdOn & modifiedOn & startStop & phevCity & phevHwy & phevComb\\\\\n", "\\hline\n", "\t 15.69571 & 0 & 0 & 0 & 19 & 0 & 0 & 0 & 0 & 0 & ⋯ & & & 0 & & Tue Jan 01 00:00:00 EST 2013 & Tue Jan 01 00:00:00 EST 2013 & & 0 & 0 & 0 \\\\\n", "\t 29.96455 & 0 & 0 & 0 & 9 & 0 & 0 & 0 & 0 & 0 & ⋯ & & & 0 & & Tue Jan 01 00:00:00 EST 2013 & Tue Jan 01 00:00:00 EST 2013 & & 0 & 0 & 0 \\\\\n", "\t 12.20778 & 0 & 0 & 0 & 23 & 0 & 0 & 0 & 0 & 0 & ⋯ & & & 0 & & Tue Jan 01 00:00:00 EST 2013 & Tue Jan 01 00:00:00 EST 2013 & & 0 & 0 & 0 \\\\\n", "\t 29.96455 & 0 & 0 & 0 & 10 & 0 & 0 & 0 & 0 & 0 & ⋯ & & & 0 & & Tue Jan 01 00:00:00 EST 2013 & Tue Jan 01 00:00:00 EST 2013 & & 0 & 0 & 0 \\\\\n", "\t 17.34789 & 0 & 0 & 0 & 17 & 0 & 0 & 0 & 0 & 0 & ⋯ & & & 0 & & Tue Jan 01 00:00:00 EST 2013 & Tue Jan 01 00:00:00 EST 2013 & & 0 & 0 & 0 \\\\\n", "\t 14.98227 & 0 & 0 & 0 & 21 & 0 & 0 & 0 & 0 & 0 & ⋯ & & & 0 & & Tue Jan 01 00:00:00 EST 2013 & Tue Jan 01 00:00:00 EST 2013 & & 0 & 0 & 0 \\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "barrels08 | barrelsA08 | charge120 | charge240 | city08 | city08U | cityA08 | cityA08U | cityCD | cityE | ⋯ | mfrCode | c240Dscr | charge240b | c240bDscr | createdOn | modifiedOn | startStop | phevCity | phevHwy | phevComb | \n", "|---|---|---|---|---|---|\n", "| 15.69571 | 0 | 0 | 0 | 19 | 0 | 0 | 0 | 0 | 0 | ⋯ | | | 0 | | Tue Jan 01 00:00:00 EST 2013 | Tue Jan 01 00:00:00 EST 2013 | | 0 | 0 | 0 | \n", "| 29.96455 | 0 | 0 | 0 | 9 | 0 | 0 | 0 | 0 | 0 | ⋯ | | | 0 | | Tue Jan 01 00:00:00 EST 2013 | Tue Jan 01 00:00:00 EST 2013 | | 0 | 0 | 0 | \n", "| 12.20778 | 0 | 0 | 0 | 23 | 0 | 0 | 0 | 0 | 0 | ⋯ | | | 0 | | Tue Jan 01 00:00:00 EST 2013 | Tue Jan 01 00:00:00 EST 2013 | | 0 | 0 | 0 | \n", "| 29.96455 | 0 | 0 | 0 | 10 | 0 | 0 | 0 | 0 | 0 | ⋯ | | | 0 | | Tue Jan 01 00:00:00 EST 2013 | Tue Jan 01 00:00:00 EST 2013 | | 0 | 0 | 0 | \n", "| 17.34789 | 0 | 0 | 0 | 17 | 0 | 0 | 0 | 0 | 0 | ⋯ | | | 0 | | Tue Jan 01 00:00:00 EST 2013 | Tue Jan 01 00:00:00 EST 2013 | | 0 | 0 | 0 | \n", "| 14.98227 | 0 | 0 | 0 | 21 | 0 | 0 | 0 | 0 | 0 | ⋯ | | | 0 | | Tue Jan 01 00:00:00 EST 2013 | Tue Jan 01 00:00:00 EST 2013 | | 0 | 0 | 0 | \n", "\n", "\n" ], "text/plain": [ " barrels08 barrelsA08 charge120 charge240 city08 city08U cityA08 cityA08U\n", "1 15.69571 0 0 0 19 0 0 0 \n", "2 29.96455 0 0 0 9 0 0 0 \n", "3 12.20778 0 0 0 23 0 0 0 \n", "4 29.96455 0 0 0 10 0 0 0 \n", "5 17.34789 0 0 0 17 0 0 0 \n", "6 14.98227 0 0 0 21 0 0 0 \n", " cityCD cityE ⋯ mfrCode c240Dscr charge240b c240bDscr\n", "1 0 0 ⋯ 0 \n", "2 0 0 ⋯ 0 \n", "3 0 0 ⋯ 0 \n", "4 0 0 ⋯ 0 \n", "5 0 0 ⋯ 0 \n", "6 0 0 ⋯ 0 \n", " createdOn modifiedOn startStop phevCity\n", "1 Tue Jan 01 00:00:00 EST 2013 Tue Jan 01 00:00:00 EST 2013 0 \n", "2 Tue Jan 01 00:00:00 EST 2013 Tue Jan 01 00:00:00 EST 2013 0 \n", "3 Tue Jan 01 00:00:00 EST 2013 Tue Jan 01 00:00:00 EST 2013 0 \n", "4 Tue Jan 01 00:00:00 EST 2013 Tue Jan 01 00:00:00 EST 2013 0 \n", "5 Tue Jan 01 00:00:00 EST 2013 Tue Jan 01 00:00:00 EST 2013 0 \n", "6 Tue Jan 01 00:00:00 EST 2013 Tue Jan 01 00:00:00 EST 2013 0 \n", " phevHwy phevComb\n", "1 0 0 \n", "2 0 0 \n", "3 0 0 \n", "4 0 0 \n", "5 0 0 \n", "6 0 0 " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "39270" ], "text/latex": [ "39270" ], "text/markdown": [ "39270" ], "text/plain": [ "[1] 39270" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "83" ], "text/latex": [ "83" ], "text/markdown": [ "83" ], "text/plain": [ "[1] 83" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
    \n", "\t
  1. 'barrels08'
  2. \n", "\t
  3. 'barrelsA08'
  4. \n", "\t
  5. 'charge120'
  6. \n", "\t
  7. 'charge240'
  8. \n", "\t
  9. 'city08'
  10. \n", "\t
  11. 'city08U'
  12. \n", "\t
  13. 'cityA08'
  14. \n", "\t
  15. 'cityA08U'
  16. \n", "\t
  17. 'cityCD'
  18. \n", "\t
  19. 'cityE'
  20. \n", "\t
  21. 'cityUF'
  22. \n", "\t
  23. 'co2'
  24. \n", "\t
  25. 'co2A'
  26. \n", "\t
  27. 'co2TailpipeAGpm'
  28. \n", "\t
  29. 'co2TailpipeGpm'
  30. \n", "\t
  31. 'comb08'
  32. \n", "\t
  33. 'comb08U'
  34. \n", "\t
  35. 'combA08'
  36. \n", "\t
  37. 'combA08U'
  38. \n", "\t
  39. 'combE'
  40. \n", "\t
  41. 'combinedCD'
  42. \n", "\t
  43. 'combinedUF'
  44. \n", "\t
  45. 'cylinders'
  46. \n", "\t
  47. 'displ'
  48. \n", "\t
  49. 'drive'
  50. \n", "\t
  51. 'engId'
  52. \n", "\t
  53. 'eng_dscr'
  54. \n", "\t
  55. 'feScore'
  56. \n", "\t
  57. 'fuelCost08'
  58. \n", "\t
  59. 'fuelCostA08'
  60. \n", "\t
  61. 'fuelType'
  62. \n", "\t
  63. 'fuelType1'
  64. \n", "\t
  65. 'ghgScore'
  66. \n", "\t
  67. 'ghgScoreA'
  68. \n", "\t
  69. 'highway08'
  70. \n", "\t
  71. 'highway08U'
  72. \n", "\t
  73. 'highwayA08'
  74. \n", "\t
  75. 'highwayA08U'
  76. \n", "\t
  77. 'highwayCD'
  78. \n", "\t
  79. 'highwayE'
  80. \n", "\t
  81. 'highwayUF'
  82. \n", "\t
  83. 'hlv'
  84. \n", "\t
  85. 'hpv'
  86. \n", "\t
  87. 'id'
  88. \n", "\t
  89. 'lv2'
  90. \n", "\t
  91. 'lv4'
  92. \n", "\t
  93. 'make'
  94. \n", "\t
  95. 'model'
  96. \n", "\t
  97. 'mpgData'
  98. \n", "\t
  99. 'phevBlended'
  100. \n", "\t
  101. 'pv2'
  102. \n", "\t
  103. 'pv4'
  104. \n", "\t
  105. 'range'
  106. \n", "\t
  107. 'rangeCity'
  108. \n", "\t
  109. 'rangeCityA'
  110. \n", "\t
  111. 'rangeHwy'
  112. \n", "\t
  113. 'rangeHwyA'
  114. \n", "\t
  115. 'trany'
  116. \n", "\t
  117. 'UCity'
  118. \n", "\t
  119. 'UCityA'
  120. \n", "\t
  121. 'UHighway'
  122. \n", "\t
  123. 'UHighwayA'
  124. \n", "\t
  125. 'VClass'
  126. \n", "\t
  127. 'year'
  128. \n", "\t
  129. 'youSaveSpend'
  130. \n", "\t
  131. 'guzzler'
  132. \n", "\t
  133. 'trans_dscr'
  134. \n", "\t
  135. 'tCharger'
  136. \n", "\t
  137. 'sCharger'
  138. \n", "\t
  139. 'atvType'
  140. \n", "\t
  141. 'fuelType2'
  142. \n", "\t
  143. 'rangeA'
  144. \n", "\t
  145. 'evMotor'
  146. \n", "\t
  147. 'mfrCode'
  148. \n", "\t
  149. 'c240Dscr'
  150. \n", "\t
  151. 'charge240b'
  152. \n", "\t
  153. 'c240bDscr'
  154. \n", "\t
  155. 'createdOn'
  156. \n", "\t
  157. 'modifiedOn'
  158. \n", "\t
  159. 'startStop'
  160. \n", "\t
  161. 'phevCity'
  162. \n", "\t
  163. 'phevHwy'
  164. \n", "\t
  165. 'phevComb'
  166. \n", "
\n" ], "text/latex": [ "\\begin{enumerate*}\n", "\\item 'barrels08'\n", "\\item 'barrelsA08'\n", "\\item 'charge120'\n", "\\item 'charge240'\n", "\\item 'city08'\n", "\\item 'city08U'\n", "\\item 'cityA08'\n", "\\item 'cityA08U'\n", "\\item 'cityCD'\n", "\\item 'cityE'\n", "\\item 'cityUF'\n", "\\item 'co2'\n", "\\item 'co2A'\n", "\\item 'co2TailpipeAGpm'\n", "\\item 'co2TailpipeGpm'\n", "\\item 'comb08'\n", "\\item 'comb08U'\n", "\\item 'combA08'\n", "\\item 'combA08U'\n", "\\item 'combE'\n", "\\item 'combinedCD'\n", "\\item 'combinedUF'\n", "\\item 'cylinders'\n", "\\item 'displ'\n", "\\item 'drive'\n", "\\item 'engId'\n", "\\item 'eng\\_dscr'\n", "\\item 'feScore'\n", "\\item 'fuelCost08'\n", "\\item 'fuelCostA08'\n", "\\item 'fuelType'\n", "\\item 'fuelType1'\n", "\\item 'ghgScore'\n", "\\item 'ghgScoreA'\n", "\\item 'highway08'\n", "\\item 'highway08U'\n", "\\item 'highwayA08'\n", "\\item 'highwayA08U'\n", "\\item 'highwayCD'\n", "\\item 'highwayE'\n", "\\item 'highwayUF'\n", "\\item 'hlv'\n", "\\item 'hpv'\n", "\\item 'id'\n", "\\item 'lv2'\n", "\\item 'lv4'\n", "\\item 'make'\n", "\\item 'model'\n", "\\item 'mpgData'\n", "\\item 'phevBlended'\n", "\\item 'pv2'\n", "\\item 'pv4'\n", "\\item 'range'\n", "\\item 'rangeCity'\n", "\\item 'rangeCityA'\n", "\\item 'rangeHwy'\n", "\\item 'rangeHwyA'\n", "\\item 'trany'\n", "\\item 'UCity'\n", "\\item 'UCityA'\n", "\\item 'UHighway'\n", "\\item 'UHighwayA'\n", "\\item 'VClass'\n", "\\item 'year'\n", "\\item 'youSaveSpend'\n", "\\item 'guzzler'\n", "\\item 'trans\\_dscr'\n", "\\item 'tCharger'\n", "\\item 'sCharger'\n", "\\item 'atvType'\n", "\\item 'fuelType2'\n", "\\item 'rangeA'\n", "\\item 'evMotor'\n", "\\item 'mfrCode'\n", "\\item 'c240Dscr'\n", "\\item 'charge240b'\n", "\\item 'c240bDscr'\n", "\\item 'createdOn'\n", "\\item 'modifiedOn'\n", "\\item 'startStop'\n", "\\item 'phevCity'\n", "\\item 'phevHwy'\n", "\\item 'phevComb'\n", "\\end{enumerate*}\n" ], "text/markdown": [ "1. 'barrels08'\n", "2. 'barrelsA08'\n", "3. 'charge120'\n", "4. 'charge240'\n", "5. 'city08'\n", "6. 'city08U'\n", "7. 'cityA08'\n", "8. 'cityA08U'\n", "9. 'cityCD'\n", "10. 'cityE'\n", "11. 'cityUF'\n", "12. 'co2'\n", "13. 'co2A'\n", "14. 'co2TailpipeAGpm'\n", "15. 'co2TailpipeGpm'\n", "16. 'comb08'\n", "17. 'comb08U'\n", "18. 'combA08'\n", "19. 'combA08U'\n", "20. 'combE'\n", "21. 'combinedCD'\n", "22. 'combinedUF'\n", "23. 'cylinders'\n", "24. 'displ'\n", "25. 'drive'\n", "26. 'engId'\n", "27. 'eng_dscr'\n", "28. 'feScore'\n", "29. 'fuelCost08'\n", "30. 'fuelCostA08'\n", "31. 'fuelType'\n", "32. 'fuelType1'\n", "33. 'ghgScore'\n", "34. 'ghgScoreA'\n", "35. 'highway08'\n", "36. 'highway08U'\n", "37. 'highwayA08'\n", "38. 'highwayA08U'\n", "39. 'highwayCD'\n", "40. 'highwayE'\n", "41. 'highwayUF'\n", "42. 'hlv'\n", "43. 'hpv'\n", "44. 'id'\n", "45. 'lv2'\n", "46. 'lv4'\n", "47. 'make'\n", "48. 'model'\n", "49. 'mpgData'\n", "50. 'phevBlended'\n", "51. 'pv2'\n", "52. 'pv4'\n", "53. 'range'\n", "54. 'rangeCity'\n", "55. 'rangeCityA'\n", "56. 'rangeHwy'\n", "57. 'rangeHwyA'\n", "58. 'trany'\n", "59. 'UCity'\n", "60. 'UCityA'\n", "61. 'UHighway'\n", "62. 'UHighwayA'\n", "63. 'VClass'\n", "64. 'year'\n", "65. 'youSaveSpend'\n", "66. 'guzzler'\n", "67. 'trans_dscr'\n", "68. 'tCharger'\n", "69. 'sCharger'\n", "70. 'atvType'\n", "71. 'fuelType2'\n", "72. 'rangeA'\n", "73. 'evMotor'\n", "74. 'mfrCode'\n", "75. 'c240Dscr'\n", "76. 'charge240b'\n", "77. 'c240bDscr'\n", "78. 'createdOn'\n", "79. 'modifiedOn'\n", "80. 'startStop'\n", "81. 'phevCity'\n", "82. 'phevHwy'\n", "83. 'phevComb'\n", "\n", "\n" ], "text/plain": [ " [1] \"barrels08\" \"barrelsA08\" \"charge120\" \"charge240\" \n", " [5] \"city08\" \"city08U\" \"cityA08\" \"cityA08U\" \n", " [9] \"cityCD\" \"cityE\" \"cityUF\" \"co2\" \n", "[13] \"co2A\" \"co2TailpipeAGpm\" \"co2TailpipeGpm\" \"comb08\" \n", "[17] \"comb08U\" \"combA08\" \"combA08U\" \"combE\" \n", "[21] \"combinedCD\" \"combinedUF\" \"cylinders\" \"displ\" \n", "[25] \"drive\" \"engId\" \"eng_dscr\" \"feScore\" \n", "[29] \"fuelCost08\" \"fuelCostA08\" \"fuelType\" \"fuelType1\" \n", "[33] \"ghgScore\" \"ghgScoreA\" \"highway08\" \"highway08U\" \n", "[37] \"highwayA08\" \"highwayA08U\" \"highwayCD\" \"highwayE\" \n", "[41] \"highwayUF\" \"hlv\" \"hpv\" \"id\" \n", "[45] \"lv2\" \"lv4\" \"make\" \"model\" \n", "[49] \"mpgData\" \"phevBlended\" \"pv2\" \"pv4\" \n", "[53] \"range\" \"rangeCity\" \"rangeCityA\" \"rangeHwy\" \n", "[57] \"rangeHwyA\" \"trany\" \"UCity\" \"UCityA\" \n", "[61] \"UHighway\" \"UHighwayA\" \"VClass\" \"year\" \n", "[65] \"youSaveSpend\" \"guzzler\" \"trans_dscr\" \"tCharger\" \n", "[69] \"sCharger\" \"atvType\" \"fuelType2\" \"rangeA\" \n", "[73] \"evMotor\" \"mfrCode\" \"c240Dscr\" \"charge240b\" \n", "[77] \"c240bDscr\" \"createdOn\" \"modifiedOn\" \"startStop\" \n", "[81] \"phevCity\" \"phevHwy\" \"phevComb\" " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "head(vehicles)\n", "nrow(vehicles) \n", "ncol(vehicles)\n", "names(vehicles)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "35" ], "text/latex": [ "35" ], "text/markdown": [ "35" ], "text/plain": [ "[1] 35" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "1984" ], "text/latex": [ "1984" ], "text/markdown": [ "1984" ], "text/plain": [ "[1] 1984" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "2018" ], "text/latex": [ "2018" ], "text/markdown": [ "2018" ], "text/plain": [ "[1] 2018" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "length(unique(vehicles[, \"year\"]))\n", "first_year <- min(vehicles[, \"year\"])\n", "first_year\n", "## 1984\n", "\n", "last_year <- max(vehicles[, \"year\"])\n", "last_year\n", "## 2014\n" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "35" ], "text/latex": [ "35" ], "text/markdown": [ "35" ], "text/plain": [ "[1] 35" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "length(unique(vehicles$year))" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\n", " Diesel Electricity Midgrade Gasoline Natural Gas \n", " 1103 145 90 60 \n", " Premium Gasoline Regular Gasoline \n", " 10852 27020 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "table(vehicles$fuelType1)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ " vehicles$trany[vehicles$trany == \"\"] <- NA" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "vehicles$trany2 <- ifelse(substr(vehicles$trany, 1, 4) ==\n", " \"Auto\", \"Auto\", \"Manual\")" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\n", " Auto Manual \n", " 26666 12593 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "vehicles$trany <- as.factor(vehicles$trany)\n", " table(vehicles$trany2)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ " year\n", "sCharger 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997\n", " 1964 1701 1210 1247 1130 1149 1074 1130 1116 1088 979 962 767 757\n", " S 0 0 0 0 0 4 4 2 5 5 3 5 6 5\n", " year\n", "sCharger 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011\n", " 800 840 826 891 949 1015 1089 1136 1067 1098 1152 1163 1089 1103\n", " S 12 12 14 20 26 29 33 30 37 28 35 19 18 25\n", " year\n", "sCharger 2012 2013 2014 2015 2016 2017 2018\n", " 1122 1142 1152 1214 1192 1222 995\n", " S 28 42 65 60 61 63 43" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "with(vehicles, table(sCharger, year))" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "'factor'" ], "text/latex": [ "'factor'" ], "text/markdown": [ "'factor'" ], "text/plain": [ "[1] \"factor\"" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
    \n", "\t
  1. \n", "\t
  2. S
  3. \n", "
\n", "\n", "
\n", "\t\n", "\t\tLevels:\n", "\t\n", "\t
    \n", "\t\t
  1. ''
  2. \n", "\t\t
  3. 'S'
  4. \n", "\t
\n", "
" ], "text/latex": [ "\\begin{enumerate*}\n", "\\item \n", "\\item S\n", "\\end{enumerate*}\n", "\n", "\\emph{Levels}: \\begin{enumerate*}\n", "\\item ''\n", "\\item 'S'\n", "\\end{enumerate*}\n" ], "text/markdown": [ "1. \n", "2. S\n", "\n", "\n", "\n", "**Levels**: 1. ''\n", "2. 'S'\n", "\n", "\n" ], "text/plain": [ "[1] S\n", "Levels: S" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "'logical'" ], "text/latex": [ "'logical'" ], "text/markdown": [ "'logical'" ], "text/plain": [ "[1] \"logical\"" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
    \n", "\t
  1. <NA>
  2. \n", "\t
  3. TRUE
  4. \n", "
\n" ], "text/latex": [ "\\begin{enumerate*}\n", "\\item \n", "\\item TRUE\n", "\\end{enumerate*}\n" ], "text/markdown": [ "1. <NA>\n", "2. TRUE\n", "\n", "\n" ], "text/plain": [ "[1] NA TRUE" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# sCharger\n", "class(vehicles$sCharger)\n", "unique(vehicles$sCharger)\n", "\n", "# tCharger\n", "class(vehicles$tCharger)\n", "unique(vehicles$tCharger)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "ename": "ERROR", "evalue": "Error in eval(expr, envir, enclos): could not find function \"ddply\"\n", "output_type": "error", "traceback": [ "Error in eval(expr, envir, enclos): could not find function \"ddply\"\nTraceback:\n" ] } ], "source": [ "mpgByYr <- ddply(vehicles, ~year, summarise, avgMPG =\n", "mean(comb08), avgHghy = mean(highway08), avgCity =\n", "mean(city08))" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "ename": "ERROR", "evalue": "Error in eval(expr, envir, enclos): could not find function \"ggplot\"\n", "output_type": "error", "traceback": [ "Error in eval(expr, envir, enclos): could not find function \"ggplot\"\nTraceback:\n" ] } ], "source": [ "ggplot(mpgByYr, aes(year, avgMPG)) + geom_point() +\n", "geom_smooth() + xlab(\"Year\") + ylab(\"Average MPG\") +\n", "ggtitle(\"All cars\")" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\n", " Diesel Electricity Midgrade Gasoline Natural Gas \n", " 1103 145 90 60 \n", " Premium Gasoline Regular Gasoline \n", " 10852 27020 " ] }, "metadata": {}, "output_type": "display_data" }, { "ename": "ERROR", "evalue": "Error in eval(expr, envir, enclos): could not find function \"ddply\"\n", "output_type": "error", "traceback": [ "Error in eval(expr, envir, enclos): could not find function \"ddply\"\nTraceback:\n" ] } ], "source": [ "table(vehicles$fuelType1)\n", "gasCars <- subset(vehicles, fuelType1 %in% c(\"Regular\n", "Gasoline\", \"Premium Gasoline\", \"Midgrade Gasoline\") &\n", "fuelType2 == \"\" & atvType != \"Hybrid\")\n", "mpgByYr_Gas <- ddply(gasCars, ~year, summarise, avgMPG =\n", "mean(comb08))\n", "ggplot(mpgByYr_Gas, aes(year, avgMPG)) + geom_point() +\n", "geom_smooth() + xlab(\"Year\") + ylab(\"Average MPG\") +\n", "ggtitle(\"Gasoline cars\")" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "ename": "ERROR", "evalue": "Error in parse(text = x, srcfile = src): :8:4: unexpected '>='\n7: ## geom_smooth: method=\"auto\" and size of largest group is\n8: >=\n ^\n", "output_type": "error", "traceback": [ "Error in parse(text = x, srcfile = src): :8:4: unexpected '>='\n7: ## geom_smooth: method=\"auto\" and size of largest group is\n8: >=\n ^\nTraceback:\n" ] } ], "source": [ " typeof(gasCars$displ)\n", " ## \"character\"\n", " gasCars$displ <- as.numeric(gasCars$displ)\n", " ggplot(gasCars, aes(displ, comb08)) + geom_point() +\n", " geom_smooth()\n", "\n", " ## geom_smooth: method=\"auto\" and size of largest group is\n", " >=1000, so using\n", " ## gam with formula: y ~ s(x, bs = \"cs\"). Use 'method = x' to\n", " change the\n", " ## smoothing method.\n", " ## Warning: Removed 2 rows containing missing values\n", " (stat_smooth).\n", " ## Warning: Removed 2 rows containing missing values\n", " (geom_point)." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ " avgCarSize <- ddply(gasCars, ~year, summarise, avgDispl =\n", " mean(displ))\n", " ggplot(avgCarSize, aes(year, avgDispl)) + geom_point() +\n", " geom_smooth() + xlab(\"Year\") + ylab(\"Average engine\n", " displacement (l)\")\n", "\n", " ## geom_smooth: method=\"auto\" and size of largest group is\n", " <1000, so using\n", " ## loess. Use 'method = x' to change the smoothing method.\n", " ## Warning: Removed 1 rows containing missing values\n", " (stat_smooth).\n", " ## Warning: Removed 1 rows containing missing values\n", " (geom_point)." ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "ename": "ERROR", "evalue": "Error in eval(expr, envir, enclos): could not find function \"ddply\"\n", "output_type": "error", "traceback": [ "Error in eval(expr, envir, enclos): could not find function \"ddply\"\nTraceback:\n" ] } ], "source": [ "byYear <- ddply(gasCars, ~year, summarise, avgMPG =\n", "mean(comb08), avgDispl = mean(displ))\n", "head(byYear)" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "ename": "ERROR", "evalue": "Error in eval(expr, envir, enclos): could not find function \"melt\"\n", "output_type": "error", "traceback": [ "Error in eval(expr, envir, enclos): could not find function \"melt\"\nTraceback:\n" ] } ], "source": [ "byYear2 = melt(byYear, id = \"year\")\n", "levels(byYear2$variable) <- c(\"Average MPG\", \"Avg engine\n", "displacement\")\n", "head(byYear2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ggplot(byYear2, aes(year, value)) + geom_point() +\n", "geom_smooth() + facet_wrap(~variable, ncol = 1, scales =\n", "\"free_y\") + xlab(\"Year\") + ylab(\"\")\n", "## geom_smooth: method=\"auto\" and size of largest group is\n", "<1000, so using\n", "## loess. Use 'method = x' to change the smoothing method.\n", "## geom_smooth: method=\"auto\" and size of largest group is\n", "<1000, so using\n", "## loess. Use 'method = x' to change the smoothing method.\n", "## Warning: Removed 1 rows containing missing values\n", "(stat_smooth).\n", "## Warning: Removed 1 rows containing missing values\n", "(geom_point)." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ " gasCars4 <- subset(gasCars, cylinders == \"4\")\n", "\n", " ggplot(gasCars4, aes(factor(year), comb08)) + geom_boxplot()\n", " + facet_wrap(~trany2, ncol = 1) + theme(axis.text.x = element_\n", " text(angle = 45)) + labs(x = \"Year\", y = \"MPG\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ggplot(gasCars4, aes(factor(year), fill = factor(trany2))) +\n", " geom_bar(position = \"fill\") + labs(x = \"Year\", y = \"Proportion\n", " of cars\", fill = \"Transmission\") + theme(axis.text.x =\n", " element_text(angle = 45)) + geom_hline(yintercept = 0.5,\n", " linetype = 2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ggplot(mpgByYr, aes(year, avgMPG)) + geom_point() + geom_smooth() +\n", "xlab(\"Year\") + ylab(\"Average MPG\") + ggtitle(\"All cars\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ " carsMake <- ddply(gasCars4, ~year, summarise, numberOfMakes =\n", " length(unique(make)))\n", "\n", " ggplot(carsMake, aes(year, numberOfMakes)) + geom_point() +\n", " labs(x = \"Year\", y = \"Number of available makes\") + ggtitle(\"Four\n", " cylinder cars\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ " uniqMakes <- dlply(gasCars4, ~year, function(x)\n", " unique(x$make))\n", " commonMakes <- Reduce(intersect, uniqMakes)\n", " commonMakes\n", " ## [1] \"Ford\" \"Honda\" \"Toyota\" \"Volkswagen\"\n", " \"Chevrolet\"\n", " ## [6] \"Chrysler\" \"Nissan\" \"Dodge\" \"Mazda\"\n", " \"Mitsubishi\"\n", " ## [11] \"Subaru\" \"Jeep\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "carsCommonMakes4 <- subset(gasCars4, make %in% commonMakes)\n", " avgMPG_commonMakes <- ddply(carsCommonMakes4, ~year + make,\n", " summarise, avgMPG = mean(comb08))\n", "\n", " ggplot(avgMPG_commonMakes, aes(year, avgMPG)) + geom_line() +\n", " facet_wrap(~make, nrow = 3)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "uniqMakes <- dlply(gasCars4, ~year, function(x) unique(x$make))\n", "commonMakes <- Reduce(intersect, uniqMakes)\n", "commonMakes" ] } ], "metadata": { "kernelspec": { "display_name": "R", "language": "R", "name": "ir" }, "language_info": { "codemirror_mode": "r", "file_extension": ".r", "mimetype": "text/x-r-source", "name": "R", "pygments_lexer": "r", "version": "3.2.3" } }, "nbformat": 4, "nbformat_minor": 2 }