Processing bike ride data from Fitotrack Android app

DATE: 2024-06-19

AUTHOR: John L. Godlee

I have been using The Fitotrack Android app[1] for a over a year now to track my bike rides. Fitotrack allows you to export the tracking data in XML format. I wrote an R script to process the XML data and create some basic summary plots. I have broken down the R script below:

1: https://github.com/russok/FitoTrack

Firstly, load necessary packages and import the compressed XML file, which has the file extension .ftb. I use Syncthing[2] to sync the backup files from my phone to my laptop.

2: https://syncthing.net/

# Process data from FitoTrack Android app
# John L. Godlee (johngodlee@gmail.com)
# Last updated: 2024-06-19

# Packages
library(dplyr)
library(XML)
library(lubridate)
library(ggplot2)
library(patchwork)
library(archive)
library(leaflet)
library(sf)

# Find all fitotrack backups
f <- list.files("~/syncthing/fitotrack", "*.ftb", full.names = TRUE)

# Check files found
stopifnot(length(f) > 0)

Then parse the file and extract each ride, represented by child nodes in the workouts part of the XML.

# 7z unarchive the file
conn <- archive_read(sort(f)[1])

# Import data
dat <- xmlParse(readLines(conn))

# Separate nodes with summary data
summ_nodes <- getNodeSet(dat, "//workouts//workouts")

Summarise each node and create a pretty dataframe, where each row is a ride.

# For each node, get children as list
summ_df <- bind_rows(lapply(seq_len(xmlSize(summ_nodes)), function(x) { 
  as.list(getChildrenStrings(summ_nodes[[x]]))
})) %>% 
  mutate(
    across(all_of(c("calorie", "ascent", "descent", 
          "avgPace", "avgSpeed", "topSpeed", 
          "length", "maxElevationMSL", "minElevationMSL")), as.numeric),
    start = as_datetime(as.numeric(start) / 1000),
    end = as_datetime(as.numeric(end) / 1000),
    duration = round(as.period(end - start)),
    pauseDuration = round(seconds_to_period(as.numeric(pauseDuration) / 1000)))

Create plots with summary information for each ride (plots_all), and a table with the same information (month_summ).

# Define conversion factor km to miles
kmt <- 0.6213711922 

# Plot average speed of all rides over time
avgSpeed_ts <- ggplot(summ_df, aes(x = start, y = avgSpeed)) + 
  geom_point(shape = 21) + 
  theme_bw() + 
  scale_y_continuous(
    name = expression("Average speed"~(km~h^-1)),
    sec.axis = sec_axis(
      transform = ~.*kmt, name = expression("Average speed"~(miles~h^-1)) )) + 
  xlab("Date")

# Plot top speed of all rides over time
topSpeed_ts <- ggplot(summ_df, aes(x = start, y = topSpeed)) + 
  geom_point(shape = 21) + 
  theme_bw() + 
  scale_y_continuous(
    name = expression("Top speed"~(km~h^-1)),
    sec.axis = sec_axis(
      transform = ~.*kmt, name = expression("Top speed"~(miles~h^-1)) )) + 
  xlab("Date")

# Plot length of all rides over time
length_ts <- summ_df %>% 
  mutate(length_km = length / 1000) %>% 
  ggplot(., aes(x = start, y = length_km)) + 
    geom_point(shape = 21) + 
    theme_bw() + 
    scale_y_continuous(
      name = "Distance (km)",
      sec.axis = sec_axis(
        transform = ~.*kmt, name = "Distance (miles)")) + 
    xlab("Date")

# Monthly breakdown of:
# total distance
# average speed
# top speed
month_summ <- summ_df %>% 
  mutate(month_year = format(as.Date(start), "%Y-%m")) %>% 
  group_by(month_year) %>% 
  summarise(
    total_dist = sum(length, na.rm = TRUE) / 1000,
    mean_speed = mean(avgSpeed, na.rm = TRUE),
    max_speed = max(topSpeed, na.rm = TRUE)) %>% 
  mutate(
    total_dist_miles = total_dist * kmt,
    mean_speed_mph = mean_speed * kmt,
    max_speed_mph = max_speed * kmt)

# Plot monthly total distance bar chart
month_dist <- month_summ %>% 
  mutate(month_year_date = as.Date(paste0(month_year, "-01"))) %>% 
  ggplot(., aes(x = month_year_date, y = total_dist)) + 
    geom_bar(stat = "identity", colour = "black", fill = "grey") + 
    theme_bw() + 
    scale_x_date(
      breaks = seq(
        as.Date(paste0(min(month_summ$month_year), "-01")), 
        as.Date(paste0(max(month_summ$month_year), "-01")),
        by = "month"),
      date_labels = "%b %Y") + 
    scale_y_continuous(
      name = "Total distance (km)",
      sec.axis = sec_axis(
        transform = ~.*kmt, name = "Total distance (miles)")) + 
    xlab("Month")

# Patchwork plots together
plots_all <- avgSpeed_ts + topSpeed_ts + length_ts + month_dist

Summary plots created by code above.

Monthly summary table cretaed by code above.

Now to process the data from a single ride. Fitotrack splits each ride up into interals which share a single ID, within the samples part of the XML.

First process each node and create a pretty dataframe.

# Get intervals
# Separate nodes
int_nodes <- getNodeSet(dat, "//samples//samples")

# For each node, get children as list
int_list <- lapply(seq_len(xmlSize(int_nodes)), function(x) { 
  as.list(getChildrenStrings(int_nodes[[x]]))
})

# Process intervals
# summ_list$samples[[1]]
int_df <- bind_rows(lapply(int_list, function(x) {
  data.frame(
    "int_id" = x$id,
    "id" = x$workoutId,
    "elevation" = as.numeric(x$elevation),
    "latitude" = as.numeric(x$lat),
    "longitude" = as.numeric(x$lon),
    "speed" = as.numeric(x$speed))
  })) %>% 
  group_by(id) %>% 
  arrange(int_id) %>% 
  mutate(int = row_number()) %>% 
  relocate(id, int) %>% 
  dplyr::select(-int_id) %>% 
  mutate(per = int / max(int))

# Check all interval IDs in summary dataframe
stopifnot(all(sort(unique(int_df$id)) %in% sort(unique(summ_df$id))))

Then extract a single ride ID, in this case the most recent ride, and create interval plots. The first is a speed plot, and the second is an elevation plot.

# Extract most recent ID
ex_id <- summ_df$id[order(summ_df$start, decreasing = TRUE)][1] 

# Create speed plot of a particular ride
int_speed <- int_df %>% 
  filter(id == ex_id) %>% 
  ggplot(., aes(x = int, y = speed)) + 
    geom_line() + 
    theme_bw() + 
    scale_y_continuous(
      name = expression("Speed"~(km~h^-1)),
      sec.axis = sec_axis(
        transform = ~.*kmt, name = expression("Speed"~(miles~h^-1)) )) + 
    xlab("Interval")

# Create elevation plot of a particular ride
int_elev <- int_df %>% 
  filter(id == ex_id) %>% 
  ggplot(., aes(x = int, y = elevation)) + 
    geom_line() + 
    theme_bw() + 
    labs(
      x = "Interval",
      y = "Elevation (m)")

# Combine speed and elevation plots for a particular ride
plots_ride <- (int_speed + int_elev) + 
  plot_layout(ncol = 1)

Plots of single ride generated by code above.

Finally, create a simple interactive map of the ride.

# Create sf object with interval points
int_sf <- int_fil %>% 
  st_as_sf(., coords = c("longitude", "latitude"), crs = 4326) 

# Duplicate points to get start and end of interval, 
# add ID, summarise to interval lines
int_lines <- int_sf %>% 
  mutate(int = int - 1) %>% 
  bind_rows(., int_sf) %>% 
  arrange(int) %>% 
  group_by(int) %>% 
  summarise(
    elevation = mean(elevation),
    speed = mean(speed),
    per = mean(per),
    n = n(), 
    do_union = FALSE) %>% 
  filter(n > 1) %>% 
  st_cast(., "LINESTRING")

# Create colour palette
pal <- colorNumeric(palette = "plasma", domain = int_lines$speed)

# Create leaflet map call
lmap <- leaflet() %>%
  addTiles() %>% 
  setView(
    lng = mean(int_fil$longitude), 
    lat = mean(int_fil$latitude), 
    zoom = 12) %>% 
  addPolylines(
    data = int_lines, 
    color = pal(int_lines$speed), 
    opacity = 1)

Screenshot of leaflet map showing route with colouring by speed.

Update 2024-07-02

I recently added a kind of heatmap that plots all my rides using leaflet. It uses the leafgl[3] package to efficiently render many line segments using webGL. For ~140,000 line segments it took about 5 seconds to load the map and the map is pretty snappy in the browser once it is rendered.

3: https://github.com/r-spatial/leafgl

int_all_sf <- int_all %>% 
  st_as_sf(., coords = c("longitude", "latitude"), crs = 4326) 

# Duplicate points to get start and end of interval, 
# add ID, summarise to interval lines
int_all_lines <- int_all_sf %>% 
  mutate(int = int - 1) %>% 
  bind_rows(., int_all_sf) %>% 
  arrange(int) %>% 
  group_by(id, int) %>% 
  summarise(
    elevation = mean(elevation),
    speed = mean(speed * 1/kmt),
    per = mean(per),
    n = n(), 
    do_union = FALSE,
    .groups = "keep") %>% 
  filter(n > 1) %>% 
  st_cast(., "LINESTRING")

# Create leaflet heatmap of all rides
heatmap_all <- leaflet(options = leafletOptions(perferCanvas = TRUE)) %>%
  addTiles() %>% 
  setView(
    lng = mean(int_all$longitude), 
    lat = mean(int_all$latitude), 
    zoom = 12) %>% 
  addGlPolylines(
    data = int_all_lines, 
    color = "#0000ff", 
    opacity = 0.05,
    src = TRUE,
    digits = 5) 

Screenshot of leaflet heatmap.