⌘+C
library(tidyverse)
Data visualization using ggplot2
Jihong
May 20, 2022
The NSF grant awards can be easily searched via NSF website. Check
Active Awards
and type “Bayesian & Cognitive Diagnosis” into the search box. Two things I’m really interested in: (1) Who as Principal Investigator (PI) obtained most NSF awards? (2) Which institute obtained most NSF awards?
pi_amount <- awards |>
select(PI = PrincipalInvestigator, Org = Organization, Amount = AwardedAmountToDate, Title) |>
filter(PI!="") |>
mutate(Org = str_replace_all(Org, pattern = " -DO NOT USE", "")) |>
mutate(Amount = str_replace_all(Amount, pattern = "[\\$\\,]", "")) |>
mutate(Amount = as.numeric(Amount)) |>
mutate(Title = str_replace(Title, " ", " ")) |>
mutate(Title = str_replace(Title, "NCRN-MN: ", "")) |>
mutate(Title = str_replace(Title, "Collaborative Research: ", "")) |>
mutate(Title = str_replace(Title, "CAREER: ", "")) |>
mutate(Title = sub(x = Title, pattern = '(?<=.{85})', replacement = '\n', perl = TRUE)) |>
mutate(Title = str_replace(Title, "^ ", "")) |>
mutate(PI = paste0(PI, "\n", Org))
pi_amount_p <- pi_amount |>
group_by(PI) |>
summarise(
Amount_sum = sum(Amount),
Award_num = n(),
Title = Title[1]
) |>
mutate(PI = fct_reorder(PI, Amount_sum)) |>
arrange(desc(PI)) |>
head(50) |> # top 100
mutate(Highlight = c(rep(1, 10), rep(0, 40)),
rank = row_number()) |>
mutate(labels = paste0(round(Amount_sum/ 1e6, 1), " M"))
ggplot(pi_amount_p) +
geom_col(aes(x = PI, y = Amount_sum, fill = factor(Highlight))) +
# geom_text(aes(x = PI, y = 0, label = labels), hjust = 1, size = 3) +
geom_text(aes(x = PI, y = 0, label = Title), hjust = 0, size = 3, alpha = 0.8) +
scale_y_continuous(labels = scales::label_number(suffix = " M", scale = 1e-6), limits = c(NA, 4500000)) +
# scale_x_discrete(labels = PI, breaks = PI) +
scale_fill_manual(values = c("darkblue", "red")) +
labs(title = "NSF Social Economic Sciences: Top 50 PIs most amount of awards\nfunded (2021-2022)",
subtitle = "Bayesian Cognitive Diagnosis",
caption = 'SEARCH: "Bayesian Cognitive Diagnosis"\nSource:https://nsf.gov/awardsearch',
x = "PI & Orgnization", y = "Total Amount of Awards") +
coord_flip() +
ggdark::dark_theme_gray() +
theme(legend.position = "", text = element_text(size = 10), title = element_text(hjust = -0.1))
---
# Documentation: https://wowchemy.com/docs/managing-content/
title: "NSF Grant on Bayesian Cognitive Diagnosis"
subtitle: "Data visualization using ggplot2"
summary: ""
authors: ["Jihong"]
tags: []
categories: []
date: 2022-05-20T12:30:38-05:00
lastmod: 2022-05-20T12:30:38-05:00
featured: false
draft: false
# Featured image
# To use, add an image named `featured.jpg/png` to your page's folder.
# Focal points: Smart, Center, TopLeft, Top, TopRight, Left, Right, BottomLeft, Bottom, BottomRight.
# Projects (optional).
# Associate this post with one or more of your projects.
# Simply enter your project's folder or file name without extension.
# E.g. `projects = ["internal-project"]` references `content/project/deep-learning/index.md`.
# Otherwise, set `projects = []`.
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, fig.align = "center")
```
> The NSF grant awards can be easily searched via [NSF website](https://nsf.gov/awardsearch). Check `Active Awards` and type "Bayesian & Cognitive Diagnosis" into the search box. Two things I'm really interested in: (1) Who as Principal Investigator (PI) obtained most NSF awards? (2) Which institute obtained most NSF awards?
```{r message=FALSE, warning=FALSE}
library(tidyverse)
```
```{r}
awards <- read.csv("Awards.csv")
awards <- awards |>
filter(NSFOrganization == "SES") # select social economic sciences
```
## Who get most awards
```{r fig.width=8, fig.height=20, message=FALSE, warning=FALSE}
pi_amount <- awards |>
select(PI = PrincipalInvestigator, Org = Organization, Amount = AwardedAmountToDate, Title) |>
filter(PI!="") |>
mutate(Org = str_replace_all(Org, pattern = " -DO NOT USE", "")) |>
mutate(Amount = str_replace_all(Amount, pattern = "[\\$\\,]", "")) |>
mutate(Amount = as.numeric(Amount)) |>
mutate(Title = str_replace(Title, " ", " ")) |>
mutate(Title = str_replace(Title, "NCRN-MN: ", "")) |>
mutate(Title = str_replace(Title, "Collaborative Research: ", "")) |>
mutate(Title = str_replace(Title, "CAREER: ", "")) |>
mutate(Title = sub(x = Title, pattern = '(?<=.{85})', replacement = '\n', perl = TRUE)) |>
mutate(Title = str_replace(Title, "^ ", "")) |>
mutate(PI = paste0(PI, "\n", Org))
pi_amount_p <- pi_amount |>
group_by(PI) |>
summarise(
Amount_sum = sum(Amount),
Award_num = n(),
Title = Title[1]
) |>
mutate(PI = fct_reorder(PI, Amount_sum)) |>
arrange(desc(PI)) |>
head(50) |> # top 100
mutate(Highlight = c(rep(1, 10), rep(0, 40)),
rank = row_number()) |>
mutate(labels = paste0(round(Amount_sum/ 1e6, 1), " M"))
ggplot(pi_amount_p) +
geom_col(aes(x = PI, y = Amount_sum, fill = factor(Highlight))) +
# geom_text(aes(x = PI, y = 0, label = labels), hjust = 1, size = 3) +
geom_text(aes(x = PI, y = 0, label = Title), hjust = 0, size = 3, alpha = 0.8) +
scale_y_continuous(labels = scales::label_number(suffix = " M", scale = 1e-6), limits = c(NA, 4500000)) +
# scale_x_discrete(labels = PI, breaks = PI) +
scale_fill_manual(values = c("darkblue", "red")) +
labs(title = "NSF Social Economic Sciences: Top 50 PIs most amount of awards\nfunded (2021-2022)",
subtitle = "Bayesian Cognitive Diagnosis",
caption = 'SEARCH: "Bayesian Cognitive Diagnosis"\nSource:https://nsf.gov/awardsearch',
x = "PI & Orgnization", y = "Total Amount of Awards") +
coord_flip() +
ggdark::dark_theme_gray() +
theme(legend.position = "", text = element_text(size = 10), title = element_text(hjust = -0.1))
```