House Sales

Graphic by Carl Goodwin
library(tidyverse)
library(scales)
library(SPARQL)
library(lubridate)
library(rebus)
library(kableExtra)
library(wesanderson)
theme_set(theme_bw())

cols <- wes_palette(name = "Darjeeling1")

I wanted to explore how events have impacted house sales in London; particularly in my own postal district (SW10). In recent years there has been a series of increases in stamp duty plus the impact of the financial crisis.

And since I originally wrote this late in 2017, we’ve had the Brexit vote and departure, and more recently still, the impacts of Covid-19.

Let’s see how SW10 is weathering all this.

First I’m going to grab the raw data by running a SPARQL query within R. The query on house prices paid is provided by HM Land Registry Open Data.

endpoint <- "https://landregistry.data.gov.uk/landregistry/query"

query <- 'PREFIX  text: <http://jena.apache.org/text#>
PREFIX  ppd:  <http://landregistry.data.gov.uk/def/ppi/>
PREFIX  lrcommon: <http://landregistry.data.gov.uk/def/common/>
  
SELECT  ?item ?ppd_propertyAddress ?ppd_hasTransaction ?ppd_pricePaid ?ppd_transactionCategory ?ppd_transactionDate ?ppd_transactionId ?ppd_estateType ?ppd_newBuild ?ppd_propertyAddressCounty ?ppd_propertyAddressDistrict ?ppd_propertyAddressLocality ?ppd_propertyAddressPaon ?ppd_propertyAddressPostcode ?ppd_propertyAddressSaon ?ppd_propertyAddressStreet ?ppd_propertyAddressTown ?ppd_propertyType ?ppd_recordStatus

WHERE
{ ?ppd_propertyAddress text:query _:b0 .
  _:b0 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> lrcommon:postcode .
  _:b0 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b1 .
  _:b1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> "( SW10 )" .
  _:b1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> _:b2 .
  _:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#first> 3000000 .
  _:b2 <http://www.w3.org/1999/02/22-rdf-syntax-ns#rest> <http://www.w3.org/1999/02/22-rdf-syntax-ns#nil> .
  ?item ppd:propertyAddress ?ppd_propertyAddress .
  ?item ppd:hasTransaction ?ppd_hasTransaction .
  ?item ppd:pricePaid ?ppd_pricePaid .
  ?item ppd:transactionCategory ?ppd_transactionCategory .
  ?item ppd:transactionDate ?ppd_transactionDate .
  ?item ppd:transactionId ?ppd_transactionId
  
  OPTIONAL { ?item ppd:estateType ?ppd_estateType }
  OPTIONAL { ?item ppd:newBuild ?ppd_newBuild }
  OPTIONAL { ?ppd_propertyAddress lrcommon:county ?ppd_propertyAddressCounty }
  OPTIONAL { ?ppd_propertyAddress lrcommon:district ?ppd_propertyAddressDistrict }
  OPTIONAL { ?ppd_propertyAddress lrcommon:locality ?ppd_propertyAddressLocality }
  OPTIONAL { ?ppd_propertyAddress lrcommon:paon ?ppd_propertyAddressPaon }
  OPTIONAL { ?ppd_propertyAddress lrcommon:postcode ?ppd_propertyAddressPostcode }
  OPTIONAL { ?ppd_propertyAddress lrcommon:saon ?ppd_propertyAddressSaon }
  OPTIONAL { ?ppd_propertyAddress lrcommon:street ?ppd_propertyAddressStreet }
  OPTIONAL { ?ppd_propertyAddress lrcommon:town ?ppd_propertyAddressTown }
  OPTIONAL { ?item ppd:propertyType ?ppd_propertyType }
  OPTIONAL { ?item ppd:recordStatus ?ppd_recordStatus }
}'

data_list <- SPARQL(endpoint, query)

The data come with an explanation of columns. And I want to filter out Additional Price Paid transactions, and exclude transfers under a power of sale/repossessions, buy-to-lets and transfers to non-private individuals.

data_tidy <- data_list$results %>%
  as_tibble() %>%
  mutate(
    date = as_datetime(ppd_transactionDate) %>% as_date(),
    amount = ppd_pricePaid / 1000000,
    prop_type = str_extract(ppd_propertyType, lookbehind("common/") %R% one_or_more(WRD)),
    est_type = str_extract(ppd_estateType, lookbehind("common/") %R% one_or_more(WRD)),
    cat = str_remove(ppd_transactionCategory, "<http://landregistry.data.gov.uk/def/ppi/"),
    prop_type = recode(prop_type, otherPropertyType = "Other")
  ) %>%
  filter(str_detect(cat, "standard"))

An article entitled Timeline: 20 years of stamp duty increases for home buyers provided many of the key dates with which to annotate my visualisation. I’ll create a little events tibble to capture these.

events <- tribble(
  ~date, ~change,
  "96-07-31", "Stamp Duty £250k (1.5%) £500k (2%)",
  "98-03-31", "£250k (2%) £500k (3%)",
  "99-03-31", "£250k (2.5%) £500k (3.5%)",
  "00-03-31", "£250k (3%) £500k (4%)",
  "11-04-30", "£250k (3%) £500k (4%) £1m (5%)",
  "12-03-31", "£250k (3%) £500k (4%) £1m (5%) £2m (7%)",
  "14-12-31", "£250k (5%) £925k (10%) 1.5m (12%)",
  "07-08-09", "Financial Crisis",
  "16-06-23", "Brexit Vote",
  "20-03-23", "Covid-19 Lockdown"
  # "20-07-08", "Stamp Duty Holiday",
) %>%
  mutate(date = ymd(date))

events %>% kable(col.names = c("Date", "Event"))
Date Event
1996-07-31 Stamp Duty £250k (1.5%) £500k (2%)
1998-03-31 £250k (2%) £500k (3%)
1999-03-31 £250k (2.5%) £500k (3.5%)
2000-03-31 £250k (3%) £500k (4%)
2011-04-30 £250k (3%) £500k (4%) £1m (5%)
2012-03-31 £250k (3%) £500k (4%) £1m (5%) £2m (7%)
2014-12-31 £250k (5%) £925k (10%) 1.5m (12%)
2007-08-09 Financial Crisis
2016-06-23 Brexit Vote
2020-03-23 Covid-19 Lockdown

I want to show each house sale transaction plotted against a backdrop of the key events. The trend lines reflect house prices for the two estate types: leasehold and freehold.

data_tidy %>%
  ggplot(aes(date, amount, colour = est_type)) +
  geom_point(alpha = 0.2, size = 0.7, show.legend = FALSE) +
  geom_smooth(se = FALSE, aes(linetype = est_type), size = 1.2) +
  labs(
    title = "SW10 Standard House Prices",
    subtitle = str_c(
      "Prices Paid to ", stamp("Jan 1, 2010")(max(data_tidy$date)),
      " (Prices > £5m Not Shown)"
    ),
    x = NULL,
    y = NULL,
    colour = "Type", linetype = "Type",
    caption = "Source: HM Land Registry"
  ) +
  geom_vline(xintercept = events$date, size = 0.5, lty = 2, alpha = 0.4) +
  annotate("text", events$date, 5,
    angle = 90,
    label = events$change, vjust = 1.4, hjust = 1, size = 3, fontface = 2
  ) +
  coord_cartesian(ylim = c(0, 5)) +
  scale_colour_manual(values = cols[c(2, 3)]) +
  scale_x_date(date_breaks = "2 years", date_labels = "%Y") +
  scale_y_continuous(labels = dollar_format(accuracy = 1, prefix = "£", suffix = "m"))

Visually, it appears to me that the financial crisis had a big impact on sales volume with the Brexit vote sucking much of the remaining oxygen out of the market. Stamp duty increases in between probably slowed any intermediate recovery.

I’ll also take a look at the composition of SW10 both by estate type and property type.

data_tidy %>%
  mutate(
    est_type = str_to_title(est_type),
    prop_type = str_to_title(prop_type)
  ) %>%
  ggplot(aes(prop_type, fill = est_type)) +
  geom_bar(alpha = 0.8) +
  geom_label(
    stat = "count", colour = "white",
    aes(label = comma(..count.., accuracy = 1), y = ..count..), size = 3.5
  ) +
  facet_wrap(~est_type, ncol = 2) +
  scale_fill_manual(values = cols[c(2, 4)]) +
  labs(
    title = "SW10 Transactions by Estate & Property Types",
    subtitle = str_c("Transaction Count to ", stamp("Jan 1, 2010")(max(data_tidy$date))),
    x = NULL, y = NULL, caption = "Source: HM Land Registry"
  ) +
  theme(legend.position = "none")

Leasehold flats dominate.

In Digging Deep I will return to house sales and look at how they correlate with planning applications, e.g. for basements and extensions.

R Toolbox

Summarising below the packages and functions used in this post enables me to separately create a toolbox visualisation summarising the usage of packages and functions across all posts.

Package Function
base library[7]; c[2]; max[2]; conflicts[1]; cumsum[1]; function[1]; search[1]; sum[1]
dplyr mutate[7]; filter[5]; if_else[3]; as_tibble[2]; tibble[2]; arrange[1]; desc[1]; group_by[1]; recode[1]; select[1]; summarise[1]; tribble[1]
ggplot2 aes[4]; ggplot[2]; labs[2]; annotate[1]; coord_cartesian[1]; facet_wrap[1]; geom_bar[1]; geom_label[1]; geom_point[1]; geom_smooth[1]; geom_vline[1]; scale_colour_manual[1]; scale_fill_manual[1]; scale_x_date[1]; scale_y_continuous[1]; theme[1]; theme_bw[1]; theme_set[1]
kableExtra kable[1]; kable_material[1]; kbl[1]
lubridate stamp[2]; as_date[1]; as_datetime[1]; date[1]; ymd[1]
purrr map[1]; map2_dfr[1]; possibly[1]; set_names[1]
readr read_lines[1]
rebus literal[4]; lookahead[3]; lookbehind[3]; one_or_more[3]; whole_word[2]; WRD[2]; ALPHA[1]; or[1]
scales comma[1]; dollar_format[1]
SPARQL SPARQL[2]
stringr str_c[4]; str_detect[4]; str_remove[3]; str_extract[2]; str_to_title[2]; str_count[1]; str_remove_all[1]
tibble enframe[1]
tidyr unnest[1]
wesanderson wes_palette[1]
Carl Goodwin
Carl Goodwin
IBM Data Scientist & Growth Strategy Leader
comments powered by Disqus

Related