Initialize R
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
##
## Die folgenden Objekte sind maskiert von 'package:stats':
##
## filter, lag
##
## Die folgenden Objekte sind maskiert von 'package:base':
##
## intersect, setdiff, setequal, union
Read data
data=read.csv("Health Data.csv")
colnames(data) = c("Start", "Finish", "cal", "distance", "hr", "steps")
Convert data
data$day = as.Date(sub("\\.","",data$Start),"%d-%b-%Y")
data$hour = as.numeric(substring(sub(".* ","", data$Start),1,2))
month_ordered = c("Jan", "Feb", "Mär", "Apr", "Mai", "Jun", "Jul", "Aug", "Sep", "Okt", "Nov", "Dez")
data$month = ordered(format(data$day, "%b"), levels=month_ordered)
weekdays_ordered=c("Montag", "Dienstag", "Mittwoch", "Donnerstag", "Freitag", "Samstag", "Sonntag")
data$weekday = ordered(weekdays(data$day), levels=weekdays_ordered)
reldata = data[data$day >= "2015-05-01" & data$day <= "2015-08-31", c("day","month","hour","weekday","cal","distance","hr","steps")]
reldata_byday = group_by(reldata, day)
reldata_day = summarise(reldata_byday, steps=sum(steps), cal=sum(cal))
reldata_active = reldata[reldata$hr>0,]
reldata_active_byday = group_by(reldata_active, day)
reldata_active_day = summarise(reldata_active_byday, weekday=max(weekday),minhr=min(hr),maxhr=max(hr),meanhr=mean(hr),active=n())
reldata_day=inner_join(reldata_day, reldata_active_day)
## Joining by: "day"
rm(reldata_byday,reldata_active,reldata_active_byday,reldata_active_day)
active_hoursHR = reldata[reldata$hr > 0 & reldata$hr<180,]
active_hoursSteps = reldata[reldata$steps > 0,]
active_hoursCal = reldata[reldata$cal > 0,]
reldataWE = reldata[reldata$weekday == "Samstag" | reldata$weekday == "Sonntag",]
Basic statistics
## [1] "Number of days used: 123"
## [1] "Average active hours per day: 14.8"
## [1] "Average activity calories per day: 451.7"
## [1] "Number of days I have not achieved activity goal of 400: 56 (46%)"
## [1] "Number of days I have not achieved 300: 31 (25%)"
## [1] "Number of days I overachieved my activity goal (2x400): 11"
## [1] "Most active day: 16.05.2015"
## [1] "Summary of HR:"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 46.00 65.00 74.00 80.32 92.00 167.00
## [1] "Summary of steps"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.203 45.000 131.000 304.400 363.500 3568.000
Some Plots
ggplot(data=active_hoursSteps, aes(x=weekday), fill=weekday) + geom_bar() + labs(title="Active hours per weekday")
ggplot(data=active_hoursSteps, aes(x=weekday, y=steps), color=month) + geom_bar(stat = "identity") + labs(title="Steps per weekday")
ggplot(data=active_hoursHR, aes(x=day, y=hr)) + geom_point(color="grey") + stat_smooth() + ggtitle("average HR")
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
ggplot(data=active_hoursSteps, aes(x=day, y=steps)) + geom_point(color="grey") + stat_smooth() + ggtitle("Average steps per hour")
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
ggplot(data=active_hoursCal, aes(x=day, y=cal)) + geom_point(color="grey") + stat_smooth() + ggtitle("Average calories per hour")
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
ggplot(data=reldata, aes(x=factor(hour), y=cal)) +
labs(title="Activity calories per hour",x="hour") +
geom_boxplot(color="red", fill="lightgreen") + ylim(0,100) +
geom_point(color="blue",alpha=.5)
## Warning: Removed 65 rows containing non-finite values (stat_boxplot).
## Warning: Removed 65 rows containing missing values (geom_point).
ggplot(data=reldataWE, aes(x=factor(hour), y=cal)) +
labs(title="Activity calories per hour (weekend)",x="hour") +
geom_boxplot(color="red", fill="lightgreen") + ylim(0,100) +
geom_point(color="blue",alpha=.5)
## Warning: Removed 20 rows containing non-finite values (stat_boxplot).
## Warning: Removed 20 rows containing missing values (geom_point).
qplot(weekday, steps, data=reldata_day, geom=c("boxplot", "jitter"), main="Steps per weekday", xlab="weekday", ylab="Steps")
qplot(weekday, cal, data=reldata_day, geom=c("boxplot", "jitter"), main="Activity calories per weekday", xlab="weekday", ylab="Cal")
ggplot(data=reldata_day, aes(x=day, y=meanhr)) +
geom_point(color="darkgrey") +
stat_smooth(aes(x=day,y=meanhr), color="black") +
geom_point(aes(x=day, y=maxhr), color="red") +
stat_smooth(aes(x=day,y=maxhr), color="red", fill="red") +
geom_point(aes(x=day, y=minhr), color="green") +
stat_smooth(aes(x=day, y=minhr),color="green",fill="green",alpha=.2) +
ggtitle("HR comparison - max(red) average(black) min(green)") +
ylim(50,150)
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
## Warning: Removed 6 rows containing missing values (stat_smooth).
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
## Warning: Removed 4 rows containing missing values (stat_smooth).
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_point).
ggplot(data=reldata_day, aes(x=day,y=active)) + geom_point() + stat_smooth() + ggtitle("Active hours per day")
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
ggplot(data=reldata_day, aes(x=day,y=cal)) + geom_point() + stat_smooth() + ggtitle("Activity calories per day") + geom_line(y=400, color="red")
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.