Initialize R

library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## Die folgenden Objekte sind maskiert von 'package:stats':
## 
##     filter, lag
## 
## Die folgenden Objekte sind maskiert von 'package:base':
## 
##     intersect, setdiff, setequal, union

Read data

data=read.csv("Health Data.csv")
colnames(data) = c("Start", "Finish", "cal", "distance", "hr", "steps")

Convert data

data$day = as.Date(sub("\\.","",data$Start),"%d-%b-%Y")
data$hour = as.numeric(substring(sub(".* ","", data$Start),1,2))
month_ordered = c("Jan", "Feb", "Mär", "Apr", "Mai", "Jun", "Jul", "Aug", "Sep", "Okt", "Nov", "Dez")
data$month = ordered(format(data$day, "%b"), levels=month_ordered)
weekdays_ordered=c("Montag", "Dienstag", "Mittwoch", "Donnerstag", "Freitag", "Samstag", "Sonntag")
data$weekday = ordered(weekdays(data$day), levels=weekdays_ordered)
reldata = data[data$day >= "2015-05-01" & data$day <= "2015-08-31", c("day","month","hour","weekday","cal","distance","hr","steps")]

reldata_byday = group_by(reldata, day)
reldata_day = summarise(reldata_byday, steps=sum(steps), cal=sum(cal))
reldata_active = reldata[reldata$hr>0,]
reldata_active_byday = group_by(reldata_active, day)
reldata_active_day = summarise(reldata_active_byday, weekday=max(weekday),minhr=min(hr),maxhr=max(hr),meanhr=mean(hr),active=n())
reldata_day=inner_join(reldata_day, reldata_active_day)
## Joining by: "day"
rm(reldata_byday,reldata_active,reldata_active_byday,reldata_active_day)

active_hoursHR = reldata[reldata$hr > 0 & reldata$hr<180,]
active_hoursSteps = reldata[reldata$steps > 0,]
active_hoursCal = reldata[reldata$cal > 0,]
reldataWE = reldata[reldata$weekday == "Samstag" | reldata$weekday == "Sonntag",]

Basic statistics

## [1] "Number of days used: 123"
## [1] "Average active hours per day: 14.8"
## [1] "Average activity calories per day: 451.7"
## [1] "Number of days I have not achieved activity goal of 400: 56 (46%)"
## [1] "Number of days I have not achieved 300: 31 (25%)"
## [1] "Number of days I overachieved my activity goal (2x400): 11"
## [1] "Most active day: 16.05.2015"
## [1] "Summary of HR:"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   46.00   65.00   74.00   80.32   92.00  167.00
## [1] "Summary of steps"
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##    0.203   45.000  131.000  304.400  363.500 3568.000

Some Plots

ggplot(data=active_hoursSteps, aes(x=weekday), fill=weekday) + geom_bar() + labs(title="Active hours per weekday")

ggplot(data=active_hoursSteps, aes(x=weekday, y=steps), color=month) + geom_bar(stat = "identity") + labs(title="Steps per weekday")

ggplot(data=active_hoursHR, aes(x=day, y=hr)) + geom_point(color="grey") + stat_smooth() + ggtitle("average HR")
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.

ggplot(data=active_hoursSteps, aes(x=day, y=steps)) + geom_point(color="grey") + stat_smooth() + ggtitle("Average steps per hour")
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.

ggplot(data=active_hoursCal, aes(x=day, y=cal)) + geom_point(color="grey") + stat_smooth() + ggtitle("Average calories per hour")
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.

ggplot(data=reldata, aes(x=factor(hour), y=cal)) + 
  labs(title="Activity calories per hour",x="hour") +
  geom_boxplot(color="red", fill="lightgreen") + ylim(0,100) +
  geom_point(color="blue",alpha=.5)
## Warning: Removed 65 rows containing non-finite values (stat_boxplot).
## Warning: Removed 65 rows containing missing values (geom_point).

ggplot(data=reldataWE, aes(x=factor(hour), y=cal)) + 
  labs(title="Activity calories per hour (weekend)",x="hour") +
  geom_boxplot(color="red", fill="lightgreen") + ylim(0,100) +
  geom_point(color="blue",alpha=.5)
## Warning: Removed 20 rows containing non-finite values (stat_boxplot).
## Warning: Removed 20 rows containing missing values (geom_point).

qplot(weekday, steps, data=reldata_day, geom=c("boxplot", "jitter"), main="Steps per weekday", xlab="weekday", ylab="Steps")

qplot(weekday, cal, data=reldata_day, geom=c("boxplot", "jitter"), main="Activity calories per weekday", xlab="weekday", ylab="Cal")

ggplot(data=reldata_day, aes(x=day, y=meanhr)) + 
  geom_point(color="darkgrey") + 
  stat_smooth(aes(x=day,y=meanhr), color="black") +
  geom_point(aes(x=day, y=maxhr), color="red") + 
  stat_smooth(aes(x=day,y=maxhr), color="red", fill="red") +
  geom_point(aes(x=day, y=minhr), color="green") + 
  stat_smooth(aes(x=day, y=minhr),color="green",fill="green",alpha=.2) +
  ggtitle("HR comparison - max(red) average(black) min(green)") +
  ylim(50,150)
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
## Warning: Removed 6 rows containing missing values (stat_smooth).
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
## Warning: Removed 4 rows containing missing values (stat_smooth).
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_point).

ggplot(data=reldata_day, aes(x=day,y=active)) + geom_point() + stat_smooth() + ggtitle("Active hours per day")
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.

ggplot(data=reldata_day, aes(x=day,y=cal)) + geom_point() + stat_smooth() + ggtitle("Activity calories per day") + geom_line(y=400, color="red")
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.