# set url path students.file = "http://ifgi.uni-muenster.de/~epebe_01/Geostatistics/student_sample.csv" # dowload locally download.file(students.file, "students.csv", mode = "wb") rm(students.file) # read into R students = read.csv("students.csv") summary(students) lapply(students,class) # convert factor Weight to character: students$Weight = as.character(students$Weight) # replace "-" with appropriate NA ("not available", missing value flag in S) students$Weight[students$Weight=="-"] = NA # convert Weight to numeric students$Weight = as.numeric(students$Weight) # check classes: lapply(students,class) # remove missing records with missing values: students = students[!is.na(students$Weight), ] # first plot: library(lattice) xyplot(as.numeric(Weight)~as.numeric(Length)|Gender,students) # compute bmi: students$bmi = students$Weight / (students$Length/100)^2 histogram(~Weight | Gender,students) histogram(~bmi | Gender,students) summary(students)