Introduction to data analysis with R and Bioconductor: 一つにまとまったページ

Data Analysis and Visualisation in R for Ecologists

Error in loadNamespace(x): there is no package called 'Knitr'
種 ID、属、種、分類群
AB、Amphispiza、bilineata、鳥類
AH、Ammospermophilus、harrisi、げっ歯類、国勢調査されていない
AS、Ammodramus、savannarum、鳥類
BA、Baiomys、taylori、げっ歯類
Error in loadNamespace(x): there is no package called 'Knitr'
種 ID、属、種、分類群
"AB"、"Amphispiza"、"bilineata"、"Bird"
"AH"、"Ammospermophilus"、"harrisi"、"げっ歯類、国勢調査されていない"
"AS"、"Ammodramus" 、"サバンナルム"、"鳥"
"BA"、"バイオミス"、"テイロリ"、"げっ歯類"
bioc-intro/data/
          /fig_output/fig1.pdf
          /fig_output/fig2.png
バープロット
引数(lm)
クルスカル
## irisはRに付属するデータフレームの例であり、 head()はデータフレームの最初の部分を返す
## 関数である
dput(head(iris))
structure(list(Sepal.Length = c(5.1, 4.9, 4.7, 4.6, 5, 5.4),
    Sepal.Width = c(3.5, 3, 3.2, 3.1, 3.6, 3.9), Petal.Length = c(1.4,
    1.4, 1.3, 1.5, 1.4, 1.7), Petal.Width = c(0.2, 0.2, 0.2,
    0.2, 0.2, 0.4), Species = structure(c(1L, 1L, 1L, 1L, 1L,
    1L), levels = c("setosa", "versicolor", "virginica"), class = "factor")), row.names = c(NA,
6L), class = "data.frame")
saveRDS(iris, file="/tmp/iris.rds")
some_data <- readRDS(file="~/Downloads/iris.rds")
sessionInfo()
R version 4.4.1 (2024-06-14)
Platform: x86_64-pc-linux-gnu
Running under: Ubuntu 22.04.4 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0

locale:
 [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8
 [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8
 [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C
[10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C

time zone: Asia/Tokyo
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base

loaded via a namespace (and not attached):
[1] compiler_4.4.1  tools_4.4.1     highr_0.11      knitr_1.48
[5] xfun_0.47       evaluate_0.24.0
library("ggplot2")
install.packages("dplyr")
install.packages("BiocManager")
BiocManager::install("SummarizedExperiment")
BiocManager::install("DESeq2")
3 + 5
[1] 8
12 / 7
[1] 1.714286
weight_kg <- 55
weight_kg <- 55    # 何も出力しません
(weight_kg <- 55)  #  しかし、呼び出しを括弧で囲むと `weight_kg` の値が出力され、
[1] 55
weight_kg          # オブジェクトの名前を入力しても同様に出力されます
[1] 55
2.2 * weight_kg
[1] 121
weight_kg <- 57.5
2.2 * weight_kg
[1] 126.5
weight_lb <- 2.2 * weight_kg
weight_kg <- 100
mass <- 47.5            # 質量?
age  <- 122             # 年齢?
mass <- mass * 2.0      # 質量?
age  <- age - 20        # 年齢?
mass_index <- mass/age  # 質量指数?
b <- sqrt(a)
round(3.14159)
[1] 3
args(round)
function (x, digits = 0, ...)
NULL
?round
round(3.14159, digits = 2)
[1] 3.14
round(3.14159, 2)
[1] 3.14
round(digits = 2, x = 3.14159)
[1] 3.14
weight_g <- c(50, 60, 65, 82)
weight_g
[1] 50 60 65 82
molecules <- c("dna", "rna", "protein")
molecules
[1] "dna"     "rna"     "protein"
length(weight_g)
[1] 4
length(molecules)
[1] 3
class(weight_g)
[1] "numeric"
class(molecules)
[1] "character"
str(weight_g)
 num [1:4] 50 60 65 82
str(molecules)
 chr [1:3] "dna" "rna" "protein"
weight_g <- c(weight_g, 90) # ベクトルの最後に追加
weight_g <- c(30, weight_g) # ベクトルの先頭に追加
weight_g
[1] 30 50 60 65 82 90
num_char <- c(1, 2, 3, "a")
num_logical <- c(1, 2, 3, TRUE, FALSE)
char_logical <- c("a", "b", "c", TRUE)
tricky <- c(1, 2, 3, "4")
class(num_char)
[1] "character"
num_char
[1] "1" "2" "3" "a"
class(num_logical)
[1] "numeric"
num_logical
[1] 1 2 3 1 0
class(char_logical)
[1] "character"
char_logical
[1] "a"    "b"    "c"    "TRUE"
class(tricky)
[1] "character"
tricky
[1] "1" "2" "3" "4"
num_logical <- c(1, 2, 3, TRUE)
char_logical <- c("a", "b", "c", TRUE)
combined_logical <- c(num_logical, char_logical)
combined_logical
[1] "1"    "2"    "3"    "1"    "a"    "b"    "c"    "TRUE"
molecules <- c("dna", "rna", "peptide", "protein")
molecules[2]
[1] "rna"
molecules[c(3, 2)]
[1] "peptide" "rna"    
more_molecules <- molecules[c(1, 2, 3, 2, 1, 4)]
more_molecules
[1] "dna"     "rna"     "peptide" "rna"     "dna"     "protein"
分子 ## すべての分子
Error in eval(expr, envir, enclos): object '分子' not found
分子[-1] ## 最初の分子を除くすべての分子
Error in eval(expr, envir, enclos): object '分子' not found
分子[-c(1, 3)] ## 1 番目/3 番目の分子を除くすべての分子
Error in eval(expr, envir, enclos): object '分子' not found
分子[c(-1, -3)] # # 1番目/3番目を除くすべて
Error in eval(expr, envir, enclos): object '分子' not found
weight_g <- c(21, 34, 39, 54, 55)
weight_g[c(TRUE, FALSE, TRUE, TRUE, FALSE)]
[1] 21 39 54
## will return logicals with TRUE for the indices that meet
## the condition
weight_g > 50
[1] FALSE FALSE FALSE  TRUE  TRUE
## so we can use this to select only the values above 50
weight_g[weight_g > 50]
[1] 54 55
weight_g[weight_g < 30 | weight_g > 50]
[1] 21 54 55
weight_g[weight_g >= 30 & weight_g == 21]
numeric(0)
分子 <- c("dna", "rna", "タンパク質", "ペプチド")
分子[分子 == "rna" |分子 == "dna"] # rna と dna の両方を返します
分子 %in% c("rna", "dna", "代謝物", "ペプチド", "グリセロール")
分子[分子 %in% c("rna", " 「DNA」、「代謝物」、「ペプチド」、「グリセロール」)]
Error: <text>:4:21: unexpected INCOMPLETE_STRING
3: 分子 %in% c("rna", "dna", "代謝物", "ペプチド", "グリセロール")
4: 分子[分子 %in% c("rna", " 「DNA」、「代謝物」、「ペプチド」、「グリセロール」)]
                       ^
"four" > "five"
[1] TRUE
x <- c(1, 5, 3, 5, 10)
names(x) ## 名前なし
NULL
names(x) <- c("A", "B", "C", "D", " E")
名前(x) ## これで名前が決まりました
Error in 名前(x): could not find function "名前"
x[c(1, 3)]
A C
1 3 
x[c("A", "C")]
A C
1 3 
heights <- c(2, 4, 4, NA, 6)
mean(heights)
[1] NA
max(heights)
[1] NA
mean(heights, na.rm = TRUE)
[1] 4
max(heights, na.rm = TRUE)
[1] 6
## 欠損値のない要素を抽出します。
heights[!is.na(heights)]
[1] 2 4 4 6
## 不完全なケースを削除したオブジェクトを返します。
## 返されるオブジェクトは、タイプ `"numeric"` の原子ベクトルです。
## (または `"double"`)。
na.omit(heights)
[1] 2 4 4 6
attr(,"na.action")
[1] 4
attr(,"class")
[1] "omit"
## 完全なケースである要素を抽出します。
## 返されるオブジェクトは、タイプ `"numeric"` の原子ベクトルです。
## (または `"double"`)。
の高さ[完全なケース(高さ)]
Error in eval(expr, envir, enclos): object 'の高さ' not found
高さ <- c(63, 69, 60, 65, NA, 68, 61, 70, 61, 59, 64, 69, 63, 63, NA, 72, 65, 64, 70, 63, 65)
heights_no_na <- heights[!is.na(heights)]
## または
heights_no_na <- na.omit(heights)
median(heights, na.rm = TRUE)
[1] 4
height_above_67 <- height_no_na[heights_no_na > 67]
Error in eval(expr, envir, enclos): object 'height_no_na' not found
長さ(heights_above_67)
Error in 長さ(heights_above_67): could not find function "長さ"
数値(3)
Error in 数値(3): could not find function "数値"
数値(10)
Error in 数値(10): could not find function "数値"
数値(0)
Error in 数値(0): could not find function "数値"
文字(2) ## 空の文字
Error in 文字(2): could not find function "文字"
論理的(2) ## FALSE
Error in 論理的(2): could not find function "論理的"
担当者(-1, 5)
Error in 担当者(-1, 5): could not find function "担当者"
担当者(NA, 5)
Error in 担当者(NA, 5): could not find function "担当者"
rep(c(1, 2, 3), 5)
 [1] 1 2 3 1 2 3 1 2 3 1 2 3 1 2 3
rep(c(1, 2, 3), each = 5)
 [1] 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3
sort(rep(c(1, 2, 3), 5))
 [1] 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3
seq(from = 1, to = 20, by = 2)
 [1]  1  3  5  7  9 11 13 15 17 19
seq(1, 5, 1)
[1] 1 2 3 4 5
seq(1, 5) ## default by
[1] 1 2 3 4 5
1:5
[1] 1 2 3 4 5
seq(from = 1, to = 20, length.out = 3)
[1]  1.0 10.5 20.0
sample(1:10)
 [1]  9  4  7  1  2  5  3 10  6  8
sample(letters, 5)
[1] "s" "a" "u" "x" "j"
sample(1:5, 10, replace = TRUE)
 [1] 2 1 5 5 1 1 5 5 2 2
sample(1:10)
 [1]  9  1  4  3  6  2  5  8 10  7
sample(1:10)
 [1]  4  9  7  6  1 10  8  3  2  5
set.seed(123)
sample(1:10)
 [1]  3 10  2  8  6  9  1  7  5  4
set.seed(123)
sample(1:10)
 [1]  3 10  2  8  6  9  1  7  5  4
set.seed(1)
sample(1:10)
 [1]  9  4  7  1  2  5  3 10  6  8
set.seed(1)
sample(1:10)
 [1]  9  4  7  1  2  5  3 10  6  8
rnorm(5)
[1]  0.69641761  0.05351568 -1.31028350 -2.12306606 -0.20807859
rnorm(5, 2, 2)
[1]  1.3744268 -0.1164714  2.8344472  1.3690969  3.6510983
rnorm(5, 100, 5)
[1] 106.45636  96.87448  95.62427 100.71678 107.12595
download.file(url = "https://github.com/carpentries-incubator/bioc-intro/raw/main/episodes/data/rnaseq.csv",
              destfile = "data/rnaseq.csv")
rna <- read.csv("data/rnaseq.csv")
RNA
head(rna)
     gene     sample expression     organism age    sex  infection  strain time
1     Asl GSM2545336       1170 Mus musculus   8 Female InfluenzaA C57BL/6    8
2    Apod GSM2545336      36194 Mus musculus   8 Female InfluenzaA C57BL/6    8
3 Cyp2d22 GSM2545336       4060 Mus musculus   8 Female InfluenzaA C57BL/6    8
4    Klk6 GSM2545336        287 Mus musculus   8 Female InfluenzaA C57BL/6    8
5   Fcrls GSM2545336         85 Mus musculus   8 Female InfluenzaA C57BL/6    8
6  Slc2a4 GSM2545336        782 Mus musculus   8 Female InfluenzaA C57BL/6    8
      tissue mouse ENTREZID
1 Cerebellum    14   109900
2 Cerebellum    14    11815
3 Cerebellum    14    56448
4 Cerebellum    14    19144
5 Cerebellum    14    80891
6 Cerebellum    14    20528
                                                                       product
1                               argininosuccinate lyase, transcript variant X1
2                                       apolipoprotein D, transcript variant 3
3 cytochrome P450, family 2, subfamily d, polypeptide 22, transcript variant 2
4                         kallikrein related-peptidase 6, transcript variant 2
5                Fc receptor-like S, scavenger receptor, transcript variant X1
6          solute carrier family 2 (facilitated glucose transporter), member 4
     ensembl_gene_id external_synonym chromosome_name   gene_biotype
1 ENSMUSG00000025533    2510006M18Rik               5 protein_coding
2 ENSMUSG00000022548             <NA>              16 protein_coding
3 ENSMUSG00000061740             2D22              15 protein_coding
4 ENSMUSG00000050063             Bssp               7 protein_coding
5 ENSMUSG00000015852    2810439C17Rik               3 protein_coding
6 ENSMUSG00000018566           Glut-4              11 protein_coding
                            phenotype_description
1           abnormal circulating amino acid level
2                      abnormal lipid homeostasis
3                        abnormal skin morphology
4                         abnormal cytokine level
5 decreased CD8-positive alpha-beta T cell number
6              abnormal circulating glucose level
  hsapiens_homolog_associated_gene_name
1                                   ASL
2                                  APOD
3                                CYP2D6
4                                  KLK6
5                                 FCRL2
6                                SLC2A4
##
## View(rna)も試してみる。
rna <- read.table(file = "data/rnaseq.csv",
                  sep = ",",
                  header = TRUE)
str(rna)
'data.frame':	32428 obs. of  19 variables:
 $ gene                                 : chr  "Asl" "Apod" "Cyp2d22" "Klk6" ...
 $ sample                               : chr  "GSM2545336" "GSM2545336" "GSM2545336" "GSM2545336" ...
 $ expression                           : int  1170 36194 4060 287 85 782 1619 288 43217 1071 ...
 $ organism                             : chr  "Mus musculus" "Mus musculus" "Mus musculus" "Mus musculus" ...
 $ age                                  : int  8 8 8 8 8 8 8 8 8 8 ...
 $ sex                                  : chr  "Female" "Female" "Female" "Female" ...
 $ infection                            : chr  "InfluenzaA" "InfluenzaA" "InfluenzaA" "InfluenzaA" ...
 $ strain                               : chr  "C57BL/6" "C57BL/6" "C57BL/6" "C57BL/6" ...
 $ time                                 : int  8 8 8 8 8 8 8 8 8 8 ...
 $ tissue                               : chr  "Cerebellum" "Cerebellum" "Cerebellum" "Cerebellum" ...
 $ mouse                                : int  14 14 14 14 14 14 14 14 14 14 ...
 $ ENTREZID                             : int  109900 11815 56448 19144 80891 20528 97827 118454 18823 14696 ...
 $ product                              : chr  "argininosuccinate lyase, transcript variant X1" "apolipoprotein D, transcript variant 3" "cytochrome P450, family 2, subfamily d, polypeptide 22, transcript variant 2" "kallikrein related-peptidase 6, transcript variant 2" ...
 $ ensembl_gene_id                      : chr  "ENSMUSG00000025533" "ENSMUSG00000022548" "ENSMUSG00000061740" "ENSMUSG00000050063" ...
 $ external_synonym                     : chr  "2510006M18Rik" NA "2D22" "Bssp" ...
 $ chromosome_name                      : chr  "5" "16" "15" "7" ...
 $ gene_biotype                         : chr  "protein_coding" "protein_coding" "protein_coding" "protein_coding" ...
 $ phenotype_description                : chr  "abnormal circulating amino acid level" "abnormal lipid homeostasis" "abnormal skin morphology" "abnormal cytokine level" ...
 $ hsapiens_homolog_associated_gene_name: chr  "ASL" "APOD" "CYP2D6" "KLK6" ...
# データフレームの1列目の最初の要素（ベクトルとして）
rna[1, 1]
# 6列目の最初の要素（ベクトルとして）
rna[1, 6]
# データフレームの1列目の要素（ベクトルとして）
rna[, 1]
# データフレームの1列目の要素（data.フレームとして）
rna[1]
# 7列目の最初の3要素（ベクトルとして）
rna[1:3, 7]
# データフレームの3行目（data.frameとして）
rna[3, ]
# head_rna <- head(rna)
head_rna <- rna[1:6, ]
head_rna
rna[, -1] ## 最初の列を除いたデータフレーム全体
rna[-c(7:66465), ] ## head(rna)と等価
rna["gene"] # Result is a data.frame
rna[, "gene"] # Result is a vector
rna[["gene"]].     # 結果はベクトル
rna$gene          # 結果はベクトル
## 1.
rna_200 <- rna[200, ]
## 2.
## Saving `n_rows` to improve readability and reduce duplication
n_rows <- nrow(rna)
rna_last <- rna[n_rows, ]
## 3.
rna_middle <- rna[n_rows / 2, ]
## 4.
rna_head <- rna[-(7:n_rows), ]
セックス <- factor(c("male", "female", "female", "male", "female"))
levels(sex)
Error in eval(expr, envir, enclos): object 'sex' not found
nlevels(sex)
Error in eval(expr, envir, enclos): object 'sex' not found
sex ## current order
Error in eval(expr, envir, enclos): object 'sex' not found
sex <- factor(sex, levels = c("male", "female"))
Error in eval(expr, envir, enclos): object 'sex' not found
sex ## after re-ordering
Error in eval(expr, envir, enclos): object 'sex' not found
プロット(性)
Error in プロット(性): could not find function "プロット"
as.character(性)
Error in eval(expr, envir, enclos): object '性' not found
levels(sex)
Error in eval(expr, envir, enclos): object 'sex' not found
levels(sex) <- c("M", "F")
Error: object 'sex' not found
sex
Error in eval(expr, envir, enclos): object 'sex' not found
plot(sex)
Error in eval(expr, envir, enclos): object 'sex' not found
levels(sex)
Error in eval(expr, envir, enclos): object 'sex' not found
levels(sex) <- c("Male", "Female")
Error: object 'sex' not found
animal_data <- data.frame(
       animal = c(dog, cat, sea cucumber, sea nurchin),
       feel = c("furry", "squishy", "spiny"),
       weight = c(45, 8 1.1, 0.8))
country_climate <- data.frame(
       country = c("Canada", "Panama", "South Africa", "Australia"),
       climate = c("cold", "hot", "temperate", "hot/temperate")、
       temperature = c(10, 30, 18, "15"),
       northern_hemisphere = c(TRUE, TRUE, FALSE, "FALSE"),
       has_kangaroo = c(FALSE, FALSE, FALSE, 1)
)
country_climate <- data.frame(
       country = c("Canada", "Panama", "South Africa", "Australia"),
       climate = c("cold", "hot", "temperate", "hot/temperate"),
       temperature = c(10, 30, 18, "15"),
       northern_hemisphere = c(TRUE, TRUE, FALSE, "FALSE"),
       has_kangaroo = c(FALSE, FALSE, FALSE, 1)
       )
str(country_climate)
'data.frame':	4 obs. of  5 variables:
 $ country            : chr  "Canada" "Panama" "South Africa" "Australia"
 $ climate            : chr  "cold" "hot" "temperate" "hot/temperate"
 $ temperature        : chr  "10" "30" "18" "15"
 $ northern_hemisphere: chr  "TRUE" "TRUE" "FALSE" "FALSE"
 $ has_kangaroo       : num  0 0 0 1
m <- matrix(1:9, ncol = 3, nrow = 3)
m
     [,1] [,2] [,3]
[1,]    1    4    7
[2,]    2    5    8
[3,]    3    6    9
##
ip <- installed.packages()
head(ip)
## View(ip)
## パッケージの数
nrow(ip)
## インストールされている全てのパッケージの名前
rownames(ip)
## 各パッケージに関する情報の種類
colnames(ip)
set.seed(123)
m <- matrix(rnorm(3000), ncol = 3)
dim(m)
[1] 1000    3
head(m)
            [,1]        [,2]       [,3]
[1,] -0.56047565 -0.99579872 -0.5116037
[2,] -0.23017749 -1.03995504  0.2369379
[3,]  1.55870831 -0.01798024 -0.5415892
[4,]  0.07050839 -0.13217513  1.2192276
[5,]  0.12928774 -2.54934277  0.1741359
[6,]  1.71506499  1.04057346 -0.6152683
library("lubridate")
my_date <- ymd("2015-01-01")
str(my_date)
 Date[1:1], format: "2015-01-01"
# sep は各コンポーネントを区切るために使う文字を示す
my_date <- ymd(paste("2015", "1", "1", sep = "-"))
str(my_date)
 Date[1:1], format: "2015-01-01"
x <- data.frame(year = c(1996, 1992, 1987, 1986, 2000, 1990, 2002, 1994, 1997, 1985),
                month = c(2, 3, 10, 1, 8, 3, 4, 5, 5),
                day = c(24, 8, 1, 5, 8, 17, 13, 10, 11, 24),
                value = c(4, 5, 1, 9, 3, 8, 10, 2, 6, 7))
Error in data.frame(year = c(1996, 1992, 1987, 1986, 2000, 1990, 2002, : arguments imply differing number of rows: 10, 9
x
Error in eval(expr, envir, enclos): object 'x' not found
paste(x$year, x$month, x$day, sep = "-")
Error in eval(expr, envir, enclos): object 'x' not found
ymd(paste(x$year, x$month, x$day, sep = "-"))
Error in eval(expr, envir, enclos): object 'x' not found
x$date <- ymd(paste(x$year, x$month, x$day, sep = "-"))
Error in eval(expr, envir, enclos): object 'x' not found
str(x) # '日付'をクラスとする新しいカラムに注目。
Error in eval(expr, envir, enclos): object 'x' not found
summary(x$date)
Error in eval(expr, envir, enclos): object 'x' not found
dmy(paste(x$day, x$month, x$year, sep = "-"))
Error in eval(expr, envir, enclos): object 'x' not found
l <- list(1:10, ## numeric
          letters, ## character
          installed.packages(), ## a matrix
          cars, ## a data.frame
          list(1, 2, 3)) ## a list
length(l)
[1] 5
str(l)
List of 5
 $ : int [1:10] 1 2 3 4 5 6 7 8 9 10
 $ : chr [1:26] "a" "b" "c" "d" ...
 $ : chr [1:187, 1:16] "askpass" "assertthat" "backports" "base64enc" ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:187] "askpass" "assertthat" "backports" "base64enc" ...
  .. ..$ : chr [1:16] "Package" "LibPath" "Version" "Priority" ...
 $ :'data.frame':	50 obs. of  2 variables:
  ..$ speed: num [1:50] 4 4 7 7 8 9 10 10 10 11 ...
  ..$ dist : num [1:50] 2 10 4 22 16 10 18 26 34 17 ...
 $ :List of 3
  ..$ : num 1
  ..$ : num 2
  ..$ : num 3
l[[1]]##
 [1]  1  2  3  4  5  6  7  8  9 10
l[1:2] ## 長さ 2 のリスト
[[1]]
 [1]  1  2  3  4  5  6  7  8  9 10

[[2]]
 [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s"
[20] "t" "u" "v" "w" "x" "y" "z"
l[1] ## 長さ 1 のリスト
[[1]]
 [1]  1  2  3  4  5  6  7  8  9 10
write.csv(rna, file = "data_output/my_rna.csv")
## dplyr を含む tidyverse パッケージをロード
library("tidyverse")
BiocManager::install("tidyverse")
rna <- read_csv("data/rnaseq.csv")

## データを見る
rna
# A tibble: 32,428 × 19
   gene    sample  expression organism   age sex   infection strain  time tissue
   <chr>   <chr>        <dbl> <chr>    <dbl> <chr> <chr>     <chr>  <dbl> <chr>
 1 Asl     GSM254…       1170 Mus mus…     8 Fema… Influenz… C57BL…     8 Cereb…
 2 Apod    GSM254…      36194 Mus mus…     8 Fema… Influenz… C57BL…     8 Cereb…
 3 Cyp2d22 GSM254…       4060 Mus mus…     8 Fema… Influenz… C57BL…     8 Cereb…
 4 Klk6    GSM254…        287 Mus mus…     8 Fema… Influenz… C57BL…     8 Cereb…
 5 Fcrls   GSM254…         85 Mus mus…     8 Fema… Influenz… C57BL…     8 Cereb…
 6 Slc2a4  GSM254…        782 Mus mus…     8 Fema… Influenz… C57BL…     8 Cereb…
 7 Exd2    GSM254…       1619 Mus mus…     8 Fema… Influenz… C57BL…     8 Cereb…
 8 Gjc2    GSM254…        288 Mus mus…     8 Fema… Influenz… C57BL…     8 Cereb…
 9 Plp1    GSM254…      43217 Mus mus…     8 Fema… Influenz… C57BL…     8 Cereb…
10 Gnb4    GSM254…       1071 Mus mus…     8 Fema… Influenz… C57BL…     8 Cereb…
# ℹ 32,418 more rows
# ℹ 9 more variables: mouse <dbl>, ENTREZID <dbl>, product <chr>,
#   ensembl_gene_id <chr>, external_synonym <chr>, chromosome_name <chr>,
#   gene_biotype <chr>, phenotype_description <chr>,
#   hsapiens_homolog_associated_gene_name <chr>
select(rna, gene, sample, tissue, expression)
# A tibble: 32,428 × 4
   gene    sample     tissue     expression
   <chr>   <chr>      <chr>           <dbl>
 1 Asl     GSM2545336 Cerebellum       1170
 2 Apod    GSM2545336 Cerebellum      36194
 3 Cyp2d22 GSM2545336 Cerebellum       4060
 4 Klk6    GSM2545336 Cerebellum        287
 5 Fcrls   GSM2545336 Cerebellum         85
 6 Slc2a4  GSM2545336 Cerebellum        782
 7 Exd2    GSM2545336 Cerebellum       1619
 8 Gjc2    GSM2545336 Cerebellum        288
 9 Plp1    GSM2545336 Cerebellum      43217
10 Gnb4    GSM2545336 Cerebellum       1071
# ℹ 32,418 more rows
select(rna, -tissue, -organism)
# A tibble: 32,428 × 17
   gene    sample   expression   age sex   infection strain  time mouse ENTREZID
   <chr>   <chr>         <dbl> <dbl> <chr> <chr>     <chr>  <dbl> <dbl>    <dbl>
 1 Asl     GSM2545…       1170     8 Fema… Influenz… C57BL…     8    14   109900
 2 Apod    GSM2545…      36194     8 Fema… Influenz… C57BL…     8    14    11815
 3 Cyp2d22 GSM2545…       4060     8 Fema… Influenz… C57BL…     8    14    56448
 4 Klk6    GSM2545…        287     8 Fema… Influenz… C57BL…     8    14    19144
 5 Fcrls   GSM2545…         85     8 Fema… Influenz… C57BL…     8    14    80891
 6 Slc2a4  GSM2545…        782     8 Fema… Influenz… C57BL…     8    14    20528
 7 Exd2    GSM2545…       1619     8 Fema… Influenz… C57BL…     8    14    97827
 8 Gjc2    GSM2545…        288     8 Fema… Influenz… C57BL…     8    14   118454
 9 Plp1    GSM2545…      43217     8 Fema… Influenz… C57BL…     8    14    18823
10 Gnb4    GSM2545…       1071     8 Fema… Influenz… C57BL…     8    14    14696
# ℹ 32,418 more rows
# ℹ 7 more variables: product <chr>, ensembl_gene_id <chr>,
#   external_synonym <chr>, chromosome_name <chr>, gene_biotype <chr>,
#   phenotype_description <chr>, hsapiens_homolog_associated_gene_name <chr>
filter(rna, sex == "Male")
# A tibble: 14,740 × 19
   gene    sample  expression organism   age sex   infection strain  time tissue
   <chr>   <chr>        <dbl> <chr>    <dbl> <chr> <chr>     <chr>  <dbl> <chr>
 1 Asl     GSM254…        626 Mus mus…     8 Male  Influenz… C57BL…     4 Cereb…
 2 Apod    GSM254…      13021 Mus mus…     8 Male  Influenz… C57BL…     4 Cereb…
 3 Cyp2d22 GSM254…       2171 Mus mus…     8 Male  Influenz… C57BL…     4 Cereb…
 4 Klk6    GSM254…        448 Mus mus…     8 Male  Influenz… C57BL…     4 Cereb…
 5 Fcrls   GSM254…        180 Mus mus…     8 Male  Influenz… C57BL…     4 Cereb…
 6 Slc2a4  GSM254…        313 Mus mus…     8 Male  Influenz… C57BL…     4 Cereb…
 7 Exd2    GSM254…       2366 Mus mus…     8 Male  Influenz… C57BL…     4 Cereb…
 8 Gjc2    GSM254…        310 Mus mus…     8 Male  Influenz… C57BL…     4 Cereb…
 9 Plp1    GSM254…      53126 Mus mus…     8 Male  Influenz… C57BL…     4 Cereb…
10 Gnb4    GSM254…       1355 Mus mus…     8 Male  Influenz… C57BL…     4 Cereb…
# ℹ 14,730 more rows
# ℹ 9 more variables: mouse <dbl>, ENTREZID <dbl>, product <chr>,
#   ensembl_gene_id <chr>, external_synonym <chr>, chromosome_name <chr>,
#   gene_biotype <chr>, phenotype_description <chr>,
#   hsapiens_homolog_associated_gene_name <chr>
filter(rna, sex == "Male" & infection == "NonInfected")
# A tibble: 4,422 × 19
   gene    sample  expression organism   age sex   infection strain  time tissue
   <chr>   <chr>        <dbl> <chr>    <dbl> <chr> <chr>     <chr>  <dbl> <chr>
 1 Asl     GSM254…        535 Mus mus…     8 Male  NonInfec… C57BL…     0 Cereb…
 2 Apod    GSM254…      13668 Mus mus…     8 Male  NonInfec… C57BL…     0 Cereb…
 3 Cyp2d22 GSM254…       2008 Mus mus…     8 Male  NonInfec… C57BL…     0 Cereb…
 4 Klk6    GSM254…       1101 Mus mus…     8 Male  NonInfec… C57BL…     0 Cereb…
 5 Fcrls   GSM254…        375 Mus mus…     8 Male  NonInfec… C57BL…     0 Cereb…
 6 Slc2a4  GSM254…        249 Mus mus…     8 Male  NonInfec… C57BL…     0 Cereb…
 7 Exd2    GSM254…       3126 Mus mus…     8 Male  NonInfec… C57BL…     0 Cereb…
 8 Gjc2    GSM254…        791 Mus mus…     8 Male  NonInfec… C57BL…     0 Cereb…
 9 Plp1    GSM254…      98658 Mus mus…     8 Male  NonInfec… C57BL…     0 Cereb…
10 Gnb4    GSM254…       2437 Mus mus…     8 Male  NonInfec… C57BL…     0 Cereb…
# ℹ 4,412 more rows
# ℹ 9 more variables: mouse <dbl>, ENTREZID <dbl>, product <chr>,
#   ensembl_gene_id <chr>, external_synonym <chr>, chromosome_name <chr>,
#   gene_biotype <chr>, phenotype_description <chr>,
#   hsapiens_homolog_associated_gene_name <chr>
genes <- select(rna, gene, hsapiens_homolog_associated_gene_name)
genes
# A tibble: 32,428 × 2
   gene    hsapiens_homolog_associated_gene_name
   <chr>   <chr>
 1 Asl     ASL
 2 Apod    APOD
 3 Cyp2d22 CYP2D6
 4 Klk6    KLK6
 5 Fcrls   FCRL2
 6 Slc2a4  SLC2A4
 7 Exd2    EXD2
 8 Gjc2    GJC2
 9 Plp1    PLP1
10 Gnb4    GNB4
# ℹ 32,418 more rows
filter(genes, is.na(hsapiens_homolog_associated_gene_name))
# A tibble: 4,290 × 2
   gene     hsapiens_homolog_associated_gene_name
   <chr>    <chr>
 1 Prodh    <NA>
 2 Tssk5    <NA>
 3 Vmn2r1   <NA>
 4 Gm10654  <NA>
 5 Hexa     <NA>
 6 Sult1a1  <NA>
 7 Gm6277   <NA>
 8 Tmem198b <NA>
 9 Adam1a   <NA>
10 Ebp      <NA>
# ℹ 4,280 more rows
filter(genes, !is.na(hsapiens_homolog_associated_gene_name))
# A tibble: 28,138 × 2
   gene    hsapiens_homolog_associated_gene_name
   <chr>   <chr>
 1 Asl     ASL
 2 Apod    APOD
 3 Cyp2d22 CYP2D6
 4 Klk6    KLK6
 5 Fcrls   FCRL2
 6 Slc2a4  SLC2A4
 7 Exd2    EXD2
 8 Gjc2    GJC2
 9 Plp1    PLP1
10 Gnb4    GNB4
# ℹ 28,128 more rows
rna2 <- filter(rna, sex == "Male")
rna3 <- select(rna2, gene, sample, tissue, expression)
rna3
# A tibble: 14,740 × 4
   gene    sample     tissue     expression
   <chr>   <chr>      <chr>           <dbl>
 1 Asl     GSM2545340 Cerebellum        626
 2 Apod    GSM2545340 Cerebellum      13021
 3 Cyp2d22 GSM2545340 Cerebellum       2171
 4 Klk6    GSM2545340 Cerebellum        448
 5 Fcrls   GSM2545340 Cerebellum        180
 6 Slc2a4  GSM2545340 Cerebellum        313
 7 Exd2    GSM2545340 Cerebellum       2366
 8 Gjc2    GSM2545340 Cerebellum        310
 9 Plp1    GSM2545340 Cerebellum      53126
10 Gnb4    GSM2545340 Cerebellum       1355
# ℹ 14,730 more rows
rna3 <- select(filter(rna, sex == "Male", gene, sample, tissue, expression))
Error in `filter()`:
ℹ In argument: `gene`.
Caused by error:
! `..2` must be a logical vector, not a character vector.
rna3
# A tibble: 14,740 × 4
   gene    sample     tissue     expression
   <chr>   <chr>      <chr>           <dbl>
 1 Asl     GSM2545340 Cerebellum        626
 2 Apod    GSM2545340 Cerebellum      13021
 3 Cyp2d22 GSM2545340 Cerebellum       2171
 4 Klk6    GSM2545340 Cerebellum        448
 5 Fcrls   GSM2545340 Cerebellum        180
 6 Slc2a4  GSM2545340 Cerebellum        313
 7 Exd2    GSM2545340 Cerebellum       2366
 8 Gjc2    GSM2545340 Cerebellum        310
 9 Plp1    GSM2545340 Cerebellum      53126
10 Gnb4    GSM2545340 Cerebellum       1355
# ℹ 14,730 more rows
rna %>%
  filter(sex == "Male") %>%
  select(gene, sample, tissue, expression)
# A tibble: 14,740 × 4
   gene    sample     tissue     expression
   <chr>   <chr>      <chr>           <dbl>
 1 Asl     GSM2545340 Cerebellum        626
 2 Apod    GSM2545340 Cerebellum      13021
 3 Cyp2d22 GSM2545340 Cerebellum       2171
 4 Klk6    GSM2545340 Cerebellum        448
 5 Fcrls   GSM2545340 Cerebellum        180
 6 Slc2a4  GSM2545340 Cerebellum        313
 7 Exd2    GSM2545340 Cerebellum       2366
 8 Gjc2    GSM2545340 Cerebellum        310
 9 Plp1    GSM2545340 Cerebellum      53126
10 Gnb4    GSM2545340 Cerebellum       1355
# ℹ 14,730 more rows
rna3<- rna %>%
  filter(sex == "Male") %>%
  select(gene, sample, tissue, expression)

rna3
# A tibble: 14,740 × 4
   gene    sample     tissue     expression
   <chr>   <chr>      <chr>           <dbl>
 1 Asl     GSM2545340 Cerebellum        626
 2 Apod    GSM2545340 Cerebellum      13021
 3 Cyp2d22 GSM2545340 Cerebellum       2171
 4 Klk6    GSM2545340 Cerebellum        448
 5 Fcrls   GSM2545340 Cerebellum        180
 6 Slc2a4  GSM2545340 Cerebellum        313
 7 Exd2    GSM2545340 Cerebellum       2366
 8 Gjc2    GSM2545340 Cerebellum        310
 9 Plp1    GSM2545340 Cerebellum      53126
10 Gnb4    GSM2545340 Cerebellum       1355
# ℹ 14,730 more rows
rna %>%
  filter(expression > 50000,
         sex == "Female",
         time == 0 ) %>%
  select(gene, sample, time, expression, age)
# A tibble: 9 × 5
  gene   sample      time expression   age
  <chr>  <chr>      <dbl>      <dbl> <dbl>
1 Plp1   GSM2545337     0     101241     8
2 Atp1b1 GSM2545337     0      53260     8
3 Plp1   GSM2545338     0      96534     8
4 Atp1b1 GSM2545338     0      50614     8
5 Plp1   GSM2545348     0     102790     8
6 Atp1b1 GSM2545348     0      59544     8
7 Plp1   GSM2545353     0      71237     8
8 Glul   GSM2545353     0      52451     8
9 Atp1b1 GSM2545353     0      61451     8
rna %>%
  mutate(time_hours = time * 24) %>%
  select(time, time_hours)
# A tibble: 32,428 × 2
    time time_hours
   <dbl>      <dbl>
 1     8        192
 2     8        192
 3     8        192
 4     8        192
 5     8        192
 6     8        192
 7     8        192
 8     8        192
 9     8        192
10     8        192
# ℹ 32,418 more rows
rna %>%
  mutate(time_hours = time * 24,
         time_mn = time_hours * 60) %>%
  select(time, time_hours, time_mn)
# A tibble: 32,428 × 3
    time time_hours time_mn
   <dbl>      <dbl>   <dbl>
 1     8        192   11520
 2     8        192   11520
 3     8        192   11520
 4     8        192   11520
 5     8        192   11520
 6     8        192   11520
 7     8        192   11520
 8     8        192   11520
 9     8        192   11520
10     8        192   11520
# ℹ 32,418 more rows
rna %>%
  mutate(expression = log(expression)) %>%
  select(gene, chromosome_name, phenotype_description, sample, expression) %>%
  filter(chromosome_name == "X" | chromosome_name == "Y") %>%
  filter(!is.na(phenotype_description)) %>%
  filter(expression > 5)
# A tibble: 649 × 5
   gene   chromosome_name phenotype_description                sample expression
   <chr>  <chr>           <chr>                                <chr>       <dbl>
 1 Plp1   X               abnormal CNS glial cell morphology   GSM25…      10.7
 2 Slc7a3 X               decreased body length                GSM25…       5.46
 3 Plxnb3 X               abnormal coat appearance             GSM25…       6.58
 4 Rbm3   X               abnormal liver morphology            GSM25…       9.32
 5 Cfp    X               abnormal cardiovascular system phys… GSM25…       6.18
 6 Ebp    X               abnormal embryonic erythrocyte morp… GSM25…       6.68
 7 Cd99l2 X               abnormal cellular extravasation      GSM25…       8.04
 8 Piga   X               abnormal brain development           GSM25…       6.06
 9 Pim2   X               decreased T cell proliferation       GSM25…       7.11
10 Itm2a  X               no abnormal phenotype detected       GSM25…       7.48
# ℹ 639 more rows
rna %>%
  group_by(gene)
# A tibble: 32,428 × 19
# Groups:   gene [1,474]
   gene    sample  expression organism   age sex   infection strain  time tissue
   <chr>   <chr>        <dbl> <chr>    <dbl> <chr> <chr>     <chr>  <dbl> <chr>
 1 Asl     GSM254…       1170 Mus mus…     8 Fema… Influenz… C57BL…     8 Cereb…
 2 Apod    GSM254…      36194 Mus mus…     8 Fema… Influenz… C57BL…     8 Cereb…
 3 Cyp2d22 GSM254…       4060 Mus mus…     8 Fema… Influenz… C57BL…     8 Cereb…
 4 Klk6    GSM254…        287 Mus mus…     8 Fema… Influenz… C57BL…     8 Cereb…
 5 Fcrls   GSM254…         85 Mus mus…     8 Fema… Influenz… C57BL…     8 Cereb…
 6 Slc2a4  GSM254…        782 Mus mus…     8 Fema… Influenz… C57BL…     8 Cereb…
 7 Exd2    GSM254…       1619 Mus mus…     8 Fema… Influenz… C57BL…     8 Cereb…
 8 Gjc2    GSM254…        288 Mus mus…     8 Fema… Influenz… C57BL…     8 Cereb…
 9 Plp1    GSM254…      43217 Mus mus…     8 Fema… Influenz… C57BL…     8 Cereb…
10 Gnb4    GSM254…       1071 Mus mus…     8 Fema… Influenz… C57BL…     8 Cereb…
# ℹ 32,418 more rows
# ℹ 9 more variables: mouse <dbl>, ENTREZID <dbl>, product <chr>,
#   ensembl_gene_id <chr>, external_synonym <chr>, chromosome_name <chr>,
#   gene_biotype <chr>, phenotype_description <chr>,
#   hsapiens_homolog_associated_gene_name <chr>
rna %>%
  group_by(sample)
# A tibble: 32,428 × 19
# Groups:   sample [22]
   gene    sample  expression organism   age sex   infection strain  time tissue
   <chr>   <chr>        <dbl> <chr>    <dbl> <chr> <chr>     <chr>  <dbl> <chr>
 1 Asl     GSM254…       1170 Mus mus…     8 Fema… Influenz… C57BL…     8 Cereb…
 2 Apod    GSM254…      36194 Mus mus…     8 Fema… Influenz… C57BL…     8 Cereb…
 3 Cyp2d22 GSM254…       4060 Mus mus…     8 Fema… Influenz… C57BL…     8 Cereb…
 4 Klk6    GSM254…        287 Mus mus…     8 Fema… Influenz… C57BL…     8 Cereb…
 5 Fcrls   GSM254…         85 Mus mus…     8 Fema… Influenz… C57BL…     8 Cereb…
 6 Slc2a4  GSM254…        782 Mus mus…     8 Fema… Influenz… C57BL…     8 Cereb…
 7 Exd2    GSM254…       1619 Mus mus…     8 Fema… Influenz… C57BL…     8 Cereb…
 8 Gjc2    GSM254…        288 Mus mus…     8 Fema… Influenz… C57BL…     8 Cereb…
 9 Plp1    GSM254…      43217 Mus mus…     8 Fema… Influenz… C57BL…     8 Cereb…
10 Gnb4    GSM254…       1071 Mus mus…     8 Fema… Influenz… C57BL…     8 Cereb…
# ℹ 32,418 more rows
# ℹ 9 more variables: mouse <dbl>, ENTREZID <dbl>, product <chr>,
#   ensembl_gene_id <chr>, external_synonym <chr>, chromosome_name <chr>,
#   gene_biotype <chr>, phenotype_description <chr>,
#   hsapiens_homolog_associated_gene_name <chr>
rna %>%
  group_by(gene %>%
  summarise(mean_expression = mean(expression))
Error: <text>:4:0: unexpected end of input
2:   group_by(gene %>%
3:   summarise(mean_expression = mean(expression))
  ^
rna %>%
  group_by(sample %>%
  summarise(mean_expression = mean(expression))
Error: <text>:4:0: unexpected end of input
2:   group_by(sample %>%
3:   summarise(mean_expression = mean(expression))
  ^
rna %>%
  group_by(gene, infection, time) %>%
  summarise(mean_expression = mean(expression))
`summarise()` has grouped output by 'gene', 'infection'. You can override using
the `.groups` argument.
# A tibble: 4,422 × 4
# Groups:   gene, infection [2,948]
   gene     infection    time mean_expression
   <chr>    <chr>       <dbl>           <dbl>
 1 AI504432 InfluenzaA      4           1104.
 2 AI504432 InfluenzaA      8           1014
 3 AI504432 NonInfected     0           1034.
 4 AW046200 InfluenzaA      4            152.
 5 AW046200 InfluenzaA      8             81
 6 AW046200 NonInfected     0            155.
 7 AW551984 InfluenzaA      4            302.
 8 AW551984 InfluenzaA      8            342.
 9 AW551984 NonInfected     0            238
10 Aamp     InfluenzaA      4           4870
# ℹ 4,412 more rows
rna %>%
  group_by(gene, infection, time) %>%
  summarise(mean_expression = mean(expression),
            median_expression = median(expression))
`summarise()` has grouped output by 'gene', 'infection'. You can override using
the `.groups` argument.
# A tibble: 4,422 × 5
# Groups:   gene, infection [2,948]
   gene     infection    time mean_expression median_expression
   <chr>    <chr>       <dbl>           <dbl>             <dbl>
 1 AI504432 InfluenzaA      4           1104.             1094.
 2 AI504432 InfluenzaA      8           1014               985
 3 AI504432 NonInfected     0           1034.             1016
 4 AW046200 InfluenzaA      4            152.              144.
 5 AW046200 InfluenzaA      8             81                82
 6 AW046200 NonInfected     0            155.              163
 7 AW551984 InfluenzaA      4            302.              245
 8 AW551984 InfluenzaA      8            342.              287
 9 AW551984 NonInfected     0            238               265
10 Aamp     InfluenzaA      4           4870              4708
# ℹ 4,412 more rows
rna %>%
  filter(gene == "Dok3") %>%
  group_by(time) %>%
  summarise(mean = mean(expression))
# A tibble: 3 × 2
   time  mean
  <dbl> <dbl>
1     0  169
2     4  156.
3     8   61 
rna %>%
    count(infection)
# A tibble: 2 × 2
  infection       n
  <chr>       <int>
1 InfluenzaA  22110
2 NonInfected 10318
rna %>%
    group_by(infection) %>%
    summarise(n = n())
# A tibble: 2 × 2
  infection       n
  <chr>       <int>
1 InfluenzaA  22110
2 NonInfected 10318
rna %>%
    count(infection, time)
# A tibble: 3 × 3
  infection    time     n
  <chr>       <dbl> <int>
1 InfluenzaA      4 11792
2 InfluenzaA      8 10318
3 NonInfected     0 10318
rna %>%
  group_by(infection, time) %>%
  summarise(n = n())
`summarise()` has grouped output by 'infection'. You can override using the
`.groups` argument.
# A tibble: 3 × 3
# Groups:   infection [2]
  infection    time     n
  <chr>       <dbl> <int>
1 InfluenzaA      4 11792
2 InfluenzaA      8 10318
3 NonInfected     0 10318
rna %>%
  count(infection, time) %>%
  arrange(time)
# A tibble: 3 × 3
  infection    time     n
  <chr>       <dbl> <int>
1 NonInfected     0 10318
2 InfluenzaA      4 11792
3 InfluenzaA      8 10318
rna %>%
  count(infection, time) %>%
  arrange(n)
# A tibble: 3 × 3
  infection    time     n
  <chr>       <dbl> <int>
1 InfluenzaA      8 10318
2 NonInfected     0 10318
3 InfluenzaA      4 11792
rna %>%
  count(infection, time) %>%
  arrange(desc(n))
# A tibble: 3 × 3
  infection    time     n
  <chr>       <dbl> <int>
1 InfluenzaA      4 11792
2 InfluenzaA      8 10318
3 NonInfected     0 10318
##
rna %>%
  count(sample)
## 2.
rna %>%
  group_by(sample) %>%
  summary(seq_depth = sum(expression)) %>%
  arrange(desc(seq_depth))
## 3.
rna %>%
  filter(sample == "GSM2545336") %>%
  count(gene_biotype) %>%
  arrange(desc(n))
## 4.
rna %>%
  filter(phenotype_description == "abnormal DNA methylation") %>%
  group_by(gene, time) %>%
  summary(mean_expression = mean(log(expression)) %>%
  arrange()
Error: <text>:20:0: unexpected end of input
18:   summary(mean_expression = mean(log(expression)) %>%
19:   arrange()
   ^
rna %>%
  arrange(gene)
# A tibble: 32,428 × 19
   gene     sample expression organism   age sex   infection strain  time tissue
   <chr>    <chr>       <dbl> <chr>    <dbl> <chr> <chr>     <chr>  <dbl> <chr>
 1 AI504432 GSM25…       1230 Mus mus…     8 Fema… Influenz… C57BL…     8 Cereb…
 2 AI504432 GSM25…       1085 Mus mus…     8 Fema… NonInfec… C57BL…     0 Cereb…
 3 AI504432 GSM25…        969 Mus mus…     8 Fema… NonInfec… C57BL…     0 Cereb…
 4 AI504432 GSM25…       1284 Mus mus…     8 Fema… Influenz… C57BL…     4 Cereb…
 5 AI504432 GSM25…        966 Mus mus…     8 Male  Influenz… C57BL…     4 Cereb…
 6 AI504432 GSM25…        918 Mus mus…     8 Male  Influenz… C57BL…     8 Cereb…
 7 AI504432 GSM25…        985 Mus mus…     8 Fema… Influenz… C57BL…     8 Cereb…
 8 AI504432 GSM25…        972 Mus mus…     8 Male  NonInfec… C57BL…     0 Cereb…
 9 AI504432 GSM25…       1000 Mus mus…     8 Fema… Influenz… C57BL…     4 Cereb…
10 AI504432 GSM25…        816 Mus mus…     8 Male  Influenz… C57BL…     4 Cereb…
# ℹ 32,418 more rows
# ℹ 9 more variables: mouse <dbl>, ENTREZID <dbl>, product <chr>,
#   ensembl_gene_id <chr>, external_synonym <chr>, chromosome_name <chr>,
#   gene_biotype <chr>, phenotype_description <chr>,
#   hsapiens_homolog_associated_gene_name <chr>
# A tibble: 1,474 × 23
   gene    GSM2545336 GSM2545337 GSM2545338 GSM2545339 GSM2545340 GSM2545341
   <chr>        <dbl>      <dbl>      <dbl>      <dbl>      <dbl>      <dbl>
 1 Asl           1170        361        400        586        626        988
 2 Apod         36194      10347       9173      10620      13021      29594
 3 Cyp2d22       4060       1616       1603       1901       2171       3349
 4 Klk6           287        629        641        578        448        195
 5 Fcrls           85        233        244        237        180         38
 6 Slc2a4         782        231        248        265        313        786
 7 Exd2          1619       2288       2235       2513       2366       1359
 8 Gjc2           288        595        568        551        310        146
 9 Plp1         43217     101241      96534      58354      53126      27173
10 Gnb4          1071       1791       1867       1430       1355        798
# ℹ 1,464 more rows
# ℹ 16 more variables: GSM2545342 <dbl>, GSM2545343 <dbl>, GSM2545344 <dbl>,
#   GSM2545345 <dbl>, GSM2545346 <dbl>, GSM2545347 <dbl>, GSM2545348 <dbl>,
#   GSM2545349 <dbl>, GSM2545350 <dbl>, GSM2545351 <dbl>, GSM2545352 <dbl>,
#   GSM2545353 <dbl>, GSM2545354 <dbl>, GSM2545362 <dbl>, GSM2545363 <dbl>,
#   GSM2545380 <dbl>
rna_exp <- rna %>%
  select(gene, sample, expression)
rna_exp
# A tibble: 32,428 × 3
   gene    sample     expression
   <chr>   <chr>           <dbl>
 1 Asl     GSM2545336       1170
 2 Apod    GSM2545336      36194
 3 Cyp2d22 GSM2545336       4060
 4 Klk6    GSM2545336        287
 5 Fcrls   GSM2545336         85
 6 Slc2a4  GSM2545336        782
 7 Exd2    GSM2545336       1619
 8 Gjc2    GSM2545336        288
 9 Plp1    GSM2545336      43217
10 Gnb4    GSM2545336       1071
# ℹ 32,418 more rows
rna_wide <- rna_exp %>%
  pivot_wider(names_from = sample,
              values_from = expression)
rna_wide
# A tibble: 1,474 × 23
   gene    GSM2545336 GSM2545337 GSM2545338 GSM2545339 GSM2545340 GSM2545341
   <chr>        <dbl>      <dbl>      <dbl>      <dbl>      <dbl>      <dbl>
 1 Asl           1170        361        400        586        626        988
 2 Apod         36194      10347       9173      10620      13021      29594
 3 Cyp2d22       4060       1616       1603       1901       2171       3349
 4 Klk6           287        629        641        578        448        195
 5 Fcrls           85        233        244        237        180         38
 6 Slc2a4         782        231        248        265        313        786
 7 Exd2          1619       2288       2235       2513       2366       1359
 8 Gjc2           288        595        568        551        310        146
 9 Plp1         43217     101241      96534      58354      53126      27173
10 Gnb4          1071       1791       1867       1430       1355        798
# ℹ 1,464 more rows
# ℹ 16 more variables: GSM2545342 <dbl>, GSM2545343 <dbl>, GSM2545344 <dbl>,
#   GSM2545345 <dbl>, GSM2545346 <dbl>, GSM2545347 <dbl>, GSM2545348 <dbl>,
#   GSM2545349 <dbl>, GSM2545350 <dbl>, GSM2545351 <dbl>, GSM2545352 <dbl>,
#   GSM2545353 <dbl>, GSM2545354 <dbl>, GSM2545362 <dbl>, GSM2545363 <dbl>,
#   GSM2545380 <dbl>
rna_with_missing_values <- rna %>%
  select(gene, sample, expression) %>%
  filter(gene %in% c("Asl", "Apod", "Cyp2d22")) %>%
  filter(sample %in% c("GSM2545336", "GSM2545337", "GSM2545338")) %>%
  arrange(sample) %>%
  filter(!(gene == "Cyp2d22" & sample != "GSM2545338"))
rna_with_missing_values
# A tibble: 7 × 3
  gene    sample     expression
  <chr>   <chr>           <dbl>
1 Asl     GSM2545336       1170
2 Apod    GSM2545336      36194
3 Asl     GSM2545337        361
4 Apod    GSM2545337      10347
5 Asl     GSM2545338        400
6 Apod    GSM2545338       9173
7 Cyp2d22 GSM2545338       1603
rna_with_missing_values %>%
  pivot_wider(names_from = sample,
              values_from = expression)
# A tibble: 3 × 4
  gene    GSM2545336 GSM2545337 GSM2545338
  <chr>        <dbl>      <dbl>      <dbl>
1 Asl           1170        361        400
2 Apod         36194      10347       9173
3 Cyp2d22         NA         NA       1603
rna_with_missing_values %>%
  pivot_wider(names_from = sample,
              values_from = expression,
              values_fill = 0)
# A tibble: 3 × 4
  gene    GSM2545336 GSM2545337 GSM2545338
  <chr>        <dbl>      <dbl>      <dbl>
1 Asl           1170        361        400
2 Apod         36194      10347       9173
3 Cyp2d22          0          0       1603
rna_long<- rna_wide %>%
    pivot_longer(names_to = "sample",
                 values_to = "expression",
                 -gene)
rna_long
# A tibble: 32,428 × 3
   gene  sample     expression
   <chr> <chr>           <dbl>
 1 Asl   GSM2545336       1170
 2 Asl   GSM2545337        361
 3 Asl   GSM2545338        400
 4 Asl   GSM2545339        586
 5 Asl   GSM2545340        626
 6 Asl   GSM2545341        988
 7 Asl   GSM2545342        836
 8 Asl   GSM2545343        535
 9 Asl   GSM2545344        586
10 Asl   GSM2545345        597
# ℹ 32,418 more rows
rna_wide %>%
    pivot_longer(names_to = "sample",
                 values_to = "expression",
                 cols = starts_with("GSM"))
# A tibble: 32,428 × 3
   gene  sample     expression
   <chr> <chr>           <dbl>
 1 Asl   GSM2545336       1170
 2 Asl   GSM2545337        361
 3 Asl   GSM2545338        400
 4 Asl   GSM2545339        586
 5 Asl   GSM2545340        626
 6 Asl   GSM2545341        988
 7 Asl   GSM2545342        836
 8 Asl   GSM2545343        535
 9 Asl   GSM2545344        586
10 Asl   GSM2545345        597
# ℹ 32,418 more rows
rna_wide %>%
    pivot_longer(names_to = "sample",
                 values_to = "expression",
                 GSM2545336:GSM2545380)
# A tibble: 32,428 × 3
   gene  sample     expression
   <chr> <chr>           <dbl>
 1 Asl   GSM2545336       1170
 2 Asl   GSM2545337        361
 3 Asl   GSM2545338        400
 4 Asl   GSM2545339        586
 5 Asl   GSM2545340        626
 6 Asl   GSM2545341        988
 7 Asl   GSM2545342        836
 8 Asl   GSM2545343        535
 9 Asl   GSM2545344        586
10 Asl   GSM2545345        597
# ℹ 32,418 more rows
rna_with_missing_values
# A tibble: 7 × 3
  gene    sample     expression
  <chr>   <chr>           <dbl>
1 Asl     GSM2545336       1170
2 Apod    GSM2545336      36194
3 Asl     GSM2545337        361
4 Apod    GSM2545337      10347
5 Asl     GSM2545338        400
6 Apod    GSM2545338       9173
7 Cyp2d22 GSM2545338       1603
wide_with_NA<- rna_with_missing_values %>%
  pivot_wider(names_from = sample,
              values_from = expression)
wide_with_NA
# A tibble: 3 × 4
  gene    GSM2545336 GSM2545337 GSM2545338
  <chr>        <dbl>      <dbl>      <dbl>
1 Asl           1170        361        400
2 Apod         36194      10347       9173
3 Cyp2d22         NA         NA       1603
wide_with_NA %>%
    pivot_longer(names_to = "sample",
                 values_to = "expression",
                 -gene)
# A tibble: 9 × 3
  gene    sample     expression
  <chr>   <chr>           <dbl>
1 Asl     GSM2545336       1170
2 Asl     GSM2545337        361
3 Asl     GSM2545338        400
4 Apod    GSM2545336      36194
5 Apod    GSM2545337      10347
6 Apod    GSM2545338       9173
7 Cyp2d22 GSM2545336         NA
8 Cyp2d22 GSM2545337         NA
9 Cyp2d22 GSM2545338       1603
rna1<- rna %>%
select(gene, mouse, expression) %>%
pivot_wider(names_from = mouse, values_from = expression)
rna1
# A tibble: 1,474 × 23
   gene     `14`    `9`  `10`  `15`  `18`   `6`   `5`  `11`  `22`  `13`  `23`
   <chr>   <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1 Asl      1170    361   400   586   626   988   836   535   586   597   938
 2 Apod    36194  10347  9173 10620 13021 29594 24959 13668 13230 15868 27769
 3 Cyp2d22  4060   1616  1603  1901  2171  3349  3122  2008  2254  2277  2985
 4 Klk6      287    629   641   578   448   195   186  1101   537   567   327
 5 Fcrls      85    233   244   237   180    38    68   375   199   177    89
 6 Slc2a4    782    231   248   265   313   786   528   249   266   357   654
 7 Exd2     1619   2288  2235  2513  2366  1359  1474  3126  2379  2173  1531
 8 Gjc2      288    595   568   551   310   146   186   791   454   370   240
 9 Plp1    43217 101241 96534 58354 53126 27173 28728 98658 61356 61647 38019
10 Gnb4     1071   1791  1867  1430  1355   798   806  2437  1394  1554   960
# ℹ 1,464 more rows
# ℹ 11 more variables: `24` <dbl>, `8` <dbl>, `7` <dbl>, `1` <dbl>, `16` <dbl>,
#   `21` <dbl>, `4` <dbl>, `2` <dbl>, `20` <dbl>, `12` <dbl>, `19` <dbl>
rna1 %>%
pivot_longer(names_to = "mouse_id", values_to = "counts", -gene)
# A tibble: 32,428 × 3
   gene  mouse_id counts
   <chr> <chr>     <dbl>
 1 Asl   14         1170
 2 Asl   9           361
 3 Asl   10          400
 4 Asl   15          586
 5 Asl   18          626
 6 Asl   6           988
 7 Asl   5           836
 8 Asl   11          535
 9 Asl   22          586
10 Asl   13          597
# ℹ 32,418 more rows
 rna %>%
  filter(chromosome_name == "Y" | chromosome_name == "X") %>%
  group_by(sex, chromosome_name) %>%
  summise(mean = mean(expression))
Error in summise(., mean = mean(expression)): could not find function "summise"
rna_1<- rna %>%
  filter(chromosome_name == "Y" | chromosome_name == "X") %>%
  group_by(sex, chromosome_name) %>%
  summise(mean = mean(expression)) %>%
  pivot_wider(names_from = sex,
              values_from = mean)
Error in summise(., mean = mean(expression)): could not find function "summise"
rna_1
Error in eval(expr, envir, enclos): object 'rna_1' not found
rna_1 %>%
  pivot_longer(names_to = "gender",
               values_to = "mean",
               -chromosome_name)
Error in eval(expr, envir, enclos): object 'rna_1' not found
rna %>%
  group_by(gene, time) %>%
  summarise(mean_exp = mean(expression))
`summarise()` has grouped output by 'gene'. You can override using the
`.groups` argument.
# A tibble: 4,422 × 3
# Groups:   gene [1,474]
   gene      time mean_exp
   <chr>    <dbl>    <dbl>
 1 AI504432     0    1034.
 2 AI504432     4    1104.
 3 AI504432     8    1014
 4 AW046200     0     155.
 5 AW046200     4     152.
 6 AW046200     8      81
 7 AW551984     0     238
 8 AW551984     4     302.
 9 AW551984     8     342.
10 Aamp         0    4603.
# ℹ 4,412 more rows
rna_time<- rna %>%
  group_by(gene, time) %>%
  summarise(mean_exp = mean(expression)) %>%
  pivot_wider(names_from = time,
              values_from = mean_exp)
`summarise()` has grouped output by 'gene'. You can override using the
`.groups` argument.
rna_time
# A tibble: 1,474 × 4
# Groups:   gene [1,474]
   gene         `0`     `4`     `8`
   <chr>      <dbl>   <dbl>   <dbl>
 1 AI504432 1034.   1104.   1014
 2 AW046200  155.    152.     81
 3 AW551984  238     302.    342.
 4 Aamp     4603.   4870    4763.
 5 Abca12      5.29    4.25    4.14
 6 Abcc8    2576.   2609.   2292.
 7 Abhd14a   591.    547.    432.
 8 Abi2     4881.   4903.   4945.
 9 Abi3bp   1175.   1061.    762.
10 Abl2     2170.   2078.   2131.
# ℹ 1,464 more rows
rna %>%
  group_by(gene, time) %>%
  summary(mean_exp = mean(expression)) %>%
  pivot_wider(names_from = time,
              values_from = mean_exp) %>%
  select(gene, 4)
Error in UseMethod("pivot_wider"): no applicable method for 'pivot_wider' applied to an object of class "table"
rna %>%
  group_by(gene, time) %>%
  summarise(mean_exp = mean(expression)) %>%
  pivot_wider(names_from = time,
              values_from = mean_exp) %>%
  select(gene, `4`)
`summarise()` has grouped output by 'gene'. You can override using the
`.groups` argument.
# A tibble: 1,474 × 2
# Groups:   gene [1,474]
   gene         `4`
   <chr>      <dbl>
 1 AI504432 1104.
 2 AW046200  152.
 3 AW551984  302.
 4 Aamp     4870
 5 Abca12      4.25
 6 Abcc8    2609.
 7 Abhd14a   547.
 8 Abi2     4903.
 9 Abi3bp   1061.
10 Abl2     2078.
# ℹ 1,464 more rows
rna %>%
  group_by(gene, time) %>%
  summary(mean_exp = mean(expression)) %>%
  pivot_wider(names_from = time,
              values_from = mean_exp) %>%
  rename("time0" = `0`, "time4" = `4`, "time8" = `8`) %>%
  select(gene, time4)
Error in UseMethod("pivot_wider"): no applicable method for 'pivot_wider' applied to an object of class "table"
rna_time
# A tibble: 1,474 × 4
# Groups:   gene [1,474]
   gene         `0`     `4`     `8`
   <chr>      <dbl>   <dbl>   <dbl>
 1 AI504432 1034.   1104.   1014
 2 AW046200  155.    152.     81
 3 AW551984  238     302.    342.
 4 Aamp     4603.   4870    4763.
 5 Abca12      5.29    4.25    4.14
 6 Abcc8    2576.   2609.   2292.
 7 Abhd14a   591.    547.    432.
 8 Abi2     4881.   4903.   4945.
 9 Abi3bp   1175.   1061.    762.
10 Abl2     2170.   2078.   2131.
# ℹ 1,464 more rows
rna_time %>%
  mutate(time_8_vs_0 = `8` / `0`, time_8_vs_4 = `8` / `4`)
# A tibble: 1,474 × 6
# Groups:   gene [1,474]
   gene         `0`     `4`     `8` time_8_vs_0 time_8_vs_4
   <chr>      <dbl>   <dbl>   <dbl>       <dbl>       <dbl>
 1 AI504432 1034.   1104.   1014          0.981       0.918
 2 AW046200  155.    152.     81          0.522       0.532
 3 AW551984  238     302.    342.         1.44        1.13
 4 Aamp     4603.   4870    4763.         1.03        0.978
 5 Abca12      5.29    4.25    4.14       0.784       0.975
 6 Abcc8    2576.   2609.   2292.         0.889       0.878
 7 Abhd14a   591.    547.    432.         0.731       0.791
 8 Abi2     4881.   4903.   4945.         1.01        1.01
 9 Abi3bp   1175.   1061.    762.         0.649       0.719
10 Abl2     2170.   2078.   2131.         0.982       1.03
# ℹ 1,464 more rows
rna_time %>%
  mutate(time_8_vs_0 = `8` / `0`, time_8_vs_4 = `8` / `4`) %>%
  pivot_longer(names_to = "comparisons",
               values_to = "Fold_changes",
               time_8_vs_0:time_8_vs_4)
# A tibble: 2,948 × 6
# Groups:   gene [1,474]
   gene         `0`     `4`     `8` comparisons Fold_changes
   <chr>      <dbl>   <dbl>   <dbl> <chr>              <dbl>
 1 AI504432 1034.   1104.   1014    time_8_vs_0        0.981
 2 AI504432 1034.   1104.   1014    time_8_vs_4        0.918
 3 AW046200  155.    152.     81    time_8_vs_0        0.522
 4 AW046200  155.    152.     81    time_8_vs_4        0.532
 5 AW551984  238     302.    342.   time_8_vs_0        1.44
 6 AW551984  238     302.    342.   time_8_vs_4        1.13
 7 Aamp     4603.   4870    4763.   time_8_vs_0        1.03
 8 Aamp     4603.   4870    4763.   time_8_vs_4        0.978
 9 Abca12      5.29    4.25    4.14 time_8_vs_0        0.784
10 Abca12      5.29    4.25    4.14 time_8_vs_4        0.975
# ℹ 2,938 more rows
rna_mini<- rna %>%
   select(gene, sample, expression) %>%
   head(10)
rna_mini
# A tibble: 10 × 3
   gene    sample     expression
   <chr>   <chr>           <dbl>
 1 Asl     GSM2545336       1170
 2 Apod    GSM2545336      36194
 3 Cyp2d22 GSM2545336       4060
 4 Klk6    GSM2545336        287
 5 Fcrls   GSM2545336         85
 6 Slc2a4  GSM2545336        782
 7 Exd2    GSM2545336       1619
 8 Gjc2    GSM2545336        288
 9 Plp1    GSM2545336      43217
10 Gnb4    GSM2545336       1071
download.file(url = "https://carpentries-incubator.github.io/bioc-intro/data/annot1.csv",
              destfile = "data/annot1.csv")
annot1 <- read_csv(file = "data/annot1.csv")
annot1
# A tibble: 10 × 2
   gene    gene_description
   <chr>   <chr>
 1 Cyp2d22 cytochrome P450, family 2, subfamily d, polypeptide 22 [Source:MGI S…
 2 Klk6    kallikrein related-peptidase 6 [Source:MGI Symbol;Acc:MGI:1343166]
 3 Fcrls   Fc receptor-like S, scavenger receptor [Source:MGI Symbol;Acc:MGI:19…
 4 Plp1    proteolipid protein (myelin) 1 [Source:MGI Symbol;Acc:MGI:97623]
 5 Exd2    exonuclease 3'-5' domain containing 2 [Source:MGI Symbol;Acc:MGI:192…
 6 Apod    apolipoprotein D [Source:MGI Symbol;Acc:MGI:88056]
 7 Gnb4    guanine nucleotide binding protein (G protein), beta 4 [Source:MGI S…
 8 Slc2a4  solute carrier family 2 (facilitated glucose transporter), member 4 …
 9 Asl     argininosuccinate lyase [Source:MGI Symbol;Acc:MGI:88084]
10 Gjc2    gap junction protein, gamma 2 [Source:MGI Symbol;Acc:MGI:2153060]    
full_join(rna_mini, annot1)
Joining with `by = join_by(gene)`
# A tibble: 10 × 4
   gene    sample     expression gene_description
   <chr>   <chr>           <dbl> <chr>
 1 Asl     GSM2545336       1170 argininosuccinate lyase [Source:MGI Symbol;Acc…
 2 Apod    GSM2545336      36194 apolipoprotein D [Source:MGI Symbol;Acc:MGI:88…
 3 Cyp2d22 GSM2545336       4060 cytochrome P450, family 2, subfamily d, polype…
 4 Klk6    GSM2545336        287 kallikrein related-peptidase 6 [Source:MGI Sym…
 5 Fcrls   GSM2545336         85 Fc receptor-like S, scavenger receptor [Source…
 6 Slc2a4  GSM2545336        782 solute carrier family 2 (facilitated glucose t…
 7 Exd2    GSM2545336       1619 exonuclease 3'-5' domain containing 2 [Source:…
 8 Gjc2    GSM2545336        288 gap junction protein, gamma 2 [Source:MGI Symb…
 9 Plp1    GSM2545336      43217 proteolipid protein (myelin) 1 [Source:MGI Sym…
10 Gnb4    GSM2545336       1071 guanine nucleotide binding protein (G protein)…
download.file(url = "https://carpentries-incubator.github.io/bioc-intro/data/annot2.csv",
              destfile = "data/annot2.csv")
annot2 <- read_csv(file = "data/annot2.csv")
annot2
# A tibble: 10 × 2
   external_gene_name description
   <chr>              <chr>
 1 Cyp2d22            cytochrome P450, family 2, subfamily d, polypeptide 22 [S…
 2 Klk6               kallikrein related-peptidase 6 [Source:MGI Symbol;Acc:MGI…
 3 Fcrls              Fc receptor-like S, scavenger receptor [Source:MGI Symbol…
 4 Plp1               proteolipid protein (myelin) 1 [Source:MGI Symbol;Acc:MGI…
 5 Exd2               exonuclease 3'-5' domain containing 2 [Source:MGI Symbol;…
 6 Apod               apolipoprotein D [Source:MGI Symbol;Acc:MGI:88056]
 7 Gnb4               guanine nucleotide binding protein (G protein), beta 4 [S…
 8 Slc2a4             solute carrier family 2 (facilitated glucose transporter)…
 9 Asl                argininosuccinate lyase [Source:MGI Symbol;Acc:MGI:88084]
10 Gjc2               gap junction protein, gamma 2 [Source:MGI Symbol;Acc:MGI:…
full_join(rna_mini, annot2, by = c("gene" = "external_gene_name"))
# A tibble: 10 × 4
   gene    sample     expression description
   <chr>   <chr>           <dbl> <chr>
 1 Asl     GSM2545336       1170 argininosuccinate lyase [Source:MGI Symbol;Acc…
 2 Apod    GSM2545336      36194 apolipoprotein D [Source:MGI Symbol;Acc:MGI:88…
 3 Cyp2d22 GSM2545336       4060 cytochrome P450, family 2, subfamily d, polype…
 4 Klk6    GSM2545336        287 kallikrein related-peptidase 6 [Source:MGI Sym…
 5 Fcrls   GSM2545336         85 Fc receptor-like S, scavenger receptor [Source…
 6 Slc2a4  GSM2545336        782 solute carrier family 2 (facilitated glucose t…
 7 Exd2    GSM2545336       1619 exonuclease 3'-5' domain containing 2 [Source:…
 8 Gjc2    GSM2545336        288 gap junction protein, gamma 2 [Source:MGI Symb…
 9 Plp1    GSM2545336      43217 proteolipid protein (myelin) 1 [Source:MGI Sym…
10 Gnb4    GSM2545336       1071 guanine nucleotide binding protein (G protein)…
annot3 <- read_csv("data/annot3.csv")
full_join(rna_mini, annot3)
# A tibble: 15 × 4
   gene    sample     expression gene_description
   <chr>   <chr>           <dbl> <chr>
 1 Asl     GSM2545336       1170 argininosuccinate lyase [Source:MGI Symbol;Acc…
 2 Apod    GSM2545336      36194 apolipoprotein D [Source:MGI Symbol;Acc:MGI:88…
 3 Cyp2d22 GSM2545336       4060 cytochrome P450, family 2, subfamily d, polype…
 4 Klk6    GSM2545336        287 <NA>
 5 Fcrls   GSM2545336         85 Fc receptor-like S, scavenger receptor [Source…
 6 Slc2a4  GSM2545336        782 solute carrier family 2 (facilitated glucose t…
 7 Exd2    GSM2545336       1619 exonuclease 3'-5' domain containing 2 [Source:…
 8 Gjc2    GSM2545336        288 gap junction protein, gamma 2 [Source:MGI Symb…
 9 Plp1    GSM2545336      43217 proteolipid protein (myelin) 1 [Source:MGI Sym…
10 Gnb4    GSM2545336       1071 guanine nucleotide binding protein (G protein)…
11 mt-Tf   <NA>               NA mitochondrially encoded tRNA phenylalanine [So…
12 mt-Rnr1 <NA>               NA mitochondrially encoded 12S rRNA [Source:MGI S…
13 mt-Tv   <NA>               NA mitochondrially encoded tRNA valine [Source:MG…
14 mt-Rnr2 <NA>               NA mitochondrially encoded 16S rRNA [Source:MGI S…
15 mt-Tl1  <NA>               NA mitochondrially encoded tRNA leucine 1 [Source…
write_csv(rna_wide, file = "data_output/rna_wide.csv")
library("tidyverse")
rna <- read.csv("data/rnaseq.csv")
ggplot(data =<DATA>, mapping = aes(<MAPPINGS>)) +<GEOM_FUNCTION>()
ggplot(data = rna)
ggplot(data = rna, mapping = aes(x = expression))
* 散布図やドットプロットなどには `geom_point()` を使用する。
*
* `geom_boxplot()` ボックスプロット！
* トレンドライン、時系列など。
ggplot(data = rna, mapping = aes(x = expression)) +
  geom_histogram()
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Assign plot to a variable
rna_plot <- ggplot(data = rna,
                   mapping = aes(x = expression))

# Draw the plot
rna_plot + geom_histogram()
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# change bins
ggplot(rna, aes(x = expression)) +
    geom_histogram(bins = 15)
# change binwidth
ggplot(rna, aes(x = expression)) +
    geom_histogram(binwidth = 2000)
rna<- rna %>%
  mutate(expression_log = log2(expression + 1))
ggplot(rna, aes(x = expression_log)) + geom_histogram()
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = rna,mapping = aes(x = expression))+
  geom_histogram() +
  scale_x_log10()
Warning in scale_x_log10(): log-10 transformation introduced infinite values.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Removed 507 rows containing non-finite outside the scale range
(`stat_bin()`).
# これはレイヤーを追加するための正しい構文です
rna_plot +
  geom_histogram()

# これは新しいレイヤーを追加せず、エラーメッセージを返します
rna_plot
  + geom_histogram()
rna_fc <- rna %>% select(gene, time,
                         gene_biotype, expression_log) %>%
  group_by(gene, time, gene_biotype) %>%
  summarize(mean_exp = mean(expression_log)) %>%
  pivot_wider(names_from = time,
              values_from = mean_exp) %>%
  mutate(time_8_vs_0 = `8` - `0`, time_4_vs_0 = `4` - `0`)
`summarise()` has grouped output by 'gene', 'time'. You can override using the
`.groups` argument.
ggplot(data = rna_fc, mapping = aes(x = time_4_vs_0, y = time_8_vs_0)) +
  geom_point()
ggplot(data = rna_fc, mapping = aes(x = time_4_vs_0, y = time_8_vs_0)) +
  geom_point(alpha = 0.3)
ggplot(data = rna_fc, mapping = aes(x = time_4_vs_0, y = time_8_vs_0)) +
  geom_point(alpha = 0.3, color = "blue")
ggplot(data = rna_fc, mapping = aes(x = time_4_vs_0, y = time_8_vs_0)) +
  geom_point(alpha = 0.3, aes(color = gene_biotype))
ggplot(data = rna_fc, mapping = aes(x = time_4_vs_0, y = time_8_vs_0,
                                color = gene_biotype)) +
  geom_point(alpha = 0.3)
ggplot(data = rna_fc, mapping = aes(x = time_4_vs_0, y = time_8_vs_0,
                                color = gene_biotype)) +
  geom_point(alpha = 0.3) +
  geom_abline(intercept = 0)
ggplot(data = rna_fc, mapping = aes(x = time_4_vs_0, y = time_8_vs_0,
                                color = gene_biotype)) +
  geom_jitter(alpha = 0.3) +
  geom_abline(intercept = 0)
Error in library("hexbin"): there is no package called 'hexbin'
install.packages("hexbin")
library("hexbin")
Error in library("hexbin"): there is no package called 'hexbin'
ggplot(data = rna_fc, mapping = aes(x = time_4_vs_0, y = time_8_vs_0)) +
  geom_hex() +
  geom_abline(intercept = 0)
Warning: Computation failed in `stat_binhex()`.
Caused by error in `compute_group()`:
! The package "hexbin" is required for `stat_bin_hex()`.
ggplot(data = rna, mapping = aes(y = expression_log, x = sample)) +
    geom_point(aes(color = time))
ggplot(data = rna,
         mapping = aes(y = expression_log, x = sample)) +
  geom_boxplot()
ggplot(data = rna,
         mapping = aes(y = expression_log, x = sample)) +
  geom_jitter(alpha = 0.2, color = "tomato") +
  geom_boxplot(alpha = 0)
ggplot(data = rna,
         mapping = aes(y = expression_log, x = sample))+
  geom_boxplot(alpha = 0) +
  geom_jitter(alpha = 0.2, color = "tomato")
ggplot(data = rna,
         mapping = aes(y = expression_log, x = sample))+
  geom_jitter(alpha = 0.2, color = "tomato") +
  geom_boxplot(alpha = 0) +
  theme(axis.text.x = element_text(angle = 90, hjust = 0.5, vjust = 0.5))
# time as integer
ggplot(data = rna,
         mapping = aes(y = expression_log,
                       x = sample))+
  geom_jitter(alpha = 0.2, aes(color = time))+
  geom_boxplot(alpha = 0) +
  theme(axis.text.x = element_text(angle = 90, hjust = 0.5, vjust = 0.5))
# time as factor
ggplot(data = rna,
         mapping = aes(y = expression_log,
                       x = sample))+
  geom_jitter(alpha = 0.2, aes(color = as.factor(time)))+
  geom_boxplot(alpha = 0) +
  theme(axis.text.x = element_text(angle = 90, hjust = 0.5, vjust = 0.5))
ggplot(data = rna,
         mapping = aes(y = expression_log, x = sample))+
  geom_violin(aes(fill = as.factor(time)))+
  theme(axis.text.x = element_text(angle = 90, hjust = 0.5, vjust = 0.5))
ggplot(data = rna,
         mapping = aes(y = expression_log, x = sample))+
  geom_violin(aes(fill = sex))+
  theme(axis.text.x = element_text(angle = 90, hjust = 0.5, vjust = 0.5))
rna_fc<- rna_fc %>% arrange(desc(time_8_vs_0))

genes_selected <- rna_fc$gene[1:10]

sub_rna<- rna %>%
    filter(gene %in% genes_selected)

mean_exp_by_time<- sub_rna %>%
  group_by(gene,time) %>%
    summarize(mean_exp = mean(expression_log))
`summarise()` has grouped output by 'gene'. You can override using the
`.groups` argument.
mean_exp_by_time
# A tibble: 30 × 3
# Groups:   gene [10]
   gene   time mean_exp
   <chr> <int>    <dbl>
 1 Acr       0     5.07
 2 Acr       4     5.54
 3 Acr       8     7.31
 4 Aipl1     0     3.70
 5 Aipl1     4     3.89
 6 Aipl1     8     6.56
 7 Bst1      0     3.20
 8 Bst1      4     3.77
 9 Bst1      8     5.22
10 Chil3     0     4.00
# ℹ 20 more rows
ggplot(data = mean_exp_by_time, mapping = aes(x = time, y = mean_exp))+
  geom_line()
ggplot(data = mean_exp_by_time,
       mapping = aes(x = time, y = mean_exp, group = gene))+
  geom_line()
ggplot(data = mean_exp_by_time,
       mapping = aes(x = time, y = mean_exp, color = gene))+
  geom_line()
ggplot(data = mean_exp_by_time,
       mapping = aes(x = time, y = mean_exp))+ geom_line() +
  facet_wrap(~ gene)
ggplot(data = mean_exp_by_time,
       mapping = aes(x = time, y = mean_exp))+
  geom_line() +
  facet_wrap(~ gene, scales = "free_y")
mean_exp_by_time_sex<- sub_rna %>%
  group_by(gene, time, sex) %>%
    summarize(mean_exp = mean(expression_log))
`summarise()` has grouped output by 'gene', 'time'. You can override using the
`.groups` argument.
mean_exp_by_time_sex
# A tibble: 60 × 4
# Groups:   gene, time [30]
   gene   time sex    mean_exp
   <chr> <int> <chr>     <dbl>
 1 Acr       0 Female     5.13
 2 Acr       0 Male       5.00
 3 Acr       4 Female     5.93
 4 Acr       4 Male       5.15
 5 Acr       8 Female     7.27
 6 Acr       8 Male       7.36
 7 Aipl1     0 Female     3.67
 8 Aipl1     0 Male       3.73
 9 Aipl1     4 Female     4.07
10 Aipl1     4 Male       3.72
# ℹ 50 more rows
ggplot(data = mean_exp_by_time_sex,
       mapping = aes(x = time, y = mean_exp, color = sex))+
  geom_line() +
  facet_wrap(~ gene, scales = "free_y")
ggplot(data = mean_exp_by_time_sex,
       mapping = aes(x = time, y = mean_exp, color = sex))+
  geom_line() +
  facet_wrap(~ gene, scales = "free_y") +
  theme_bw() +
  theme(panel.grid = element_blank())
mean_exp_by_chromosome<- rna %>%
  group_by(chromosome_name, time) %>%
  summary(mean_exp = mean(expression_log))

ggplot(data = mean_exp_by_chromosome, mapping = aes(x = time,
                                y = mean_exp))+
  geom_line() +
  facet_wrap(~ chromosome_name, scales = "free_y")
Error in `fortify()`:
! `data` must be a <data.frame>, or an object coercible by `fortify()`,
  or a valid <data.frame>-like object coercible by `as.data.frame()`.
Caused by error in `.postvalidate_data_frame_like_object()`:
! `as.data.frame(data)` must preserve dimensions.
# 1列、行ごとのファセット
ggplot(data = mean_exp_by_time_sex,
       mapping = aes(x = time, y = mean_exp, color = gene))+
  geom_line() +
  facet_grid(sex ~ .)
# 1行、列ごとのファセット
ggplot(data = mean_exp_by_time_sex,
       mapping = aes(x = time, y = mean_exp, color = gene))+
  geom_line() +
  facet_grid(. ~ sex)
ggplot(data = mean_exp_by_time_sex,
       mapping = aes(x = time, y = mean_exp, color = sex))+
  geom_line() +
  facet_wrap(~ gene, scales = "free_y") +
  theme_bw() +
  theme(panel.grid = element_blank()) +
  labs(title = "感染期間別の平均遺伝子発現",
       x = "感染期間（日単位）",
       y = "平均遺伝子発現")
ggplot(data = mean_exp_by_time_sex,
       mapping = aes(x = time, y = mean_exp, color = sex))+
  geom_line() +
  facet_wrap(~ gene, scales = "free_y") +
  theme_bw() +
  theme(panel.grid = element_blank()) +
  labs(title = "感染期間別の平均遺伝子発現",
       x = "感染期間（日単位）",
       y = "平均遺伝子発現") +
  theme(text = element_text(size = 16))
ggplot(data = mean_exp_by_time_sex,
       mapping = aes(x = time, y = mean_exp, color = sex))+
  geom_line() +
  facet_wrap(~ gene, scales = "free_y") +
  theme_bw() +
  theme(panel.grid = element_blank()) +
  labs(title = "感染期間別の平均遺伝子発現",
       x = "感染期間（日単位）",
       y = "平均遺伝子発現") +
  theme(text = element_text(size = 16),
        axis.text.x = element_text(color = "royalblue4", size = 12),
        axis.text.y = element_text(color = "royalblue4", size = 12),
        panel.grid = element_line(color="lightsteelblue1"),
        legend.position = "top")
blue_theme <- theme(axis.text.x = element_text(colour = "royalblue4",
                                               size = 12),
                    axis.text.y = element_text(colour = "royalblue4",
                                               size = 12),
                    text = element_text(size = 16),
                    panel.grid = element_line(colour="lightsteelblue1"))

ggplot(rna, aes(x = expression_log)) +
  geom_histogram(bins = 20) +
    blue_theme
ggplot(data = mean_exp_by_time_sex,
       mapping = aes(x = time, y = mean_exp, color = sex))+
  geom_line() +
  facet_wrap(~ gene, scales = "free_y") +
  theme_bw() +
  theme(panel.grid = element_blank())
# change the thickness of the lines
ggplot(data = mean_exp_by_time_sex,
       mapping = aes(x = time, y = mean_exp, color = sex)) +
  geom_line(size=1.5) +
  facet_wrap(~ gene, scales = "free_y") +
  theme_bw() +
  theme(panel.grid = element_blank())
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
generated.
# change the name of the legend and the labels
ggplot(data = mean_exp_by_time_sex,
       mapping = aes(x = time, y = mean_exp, color = sex)) +
  geom_line() +
  facet_wrap(~ gene, scales = "free_y") +
  theme_bw() +
  theme(panel.grid = element_blank()) +
  scale_color_discrete(name = "Gender", labels = c("F", "M"))
# using a different color palette
ggplot(data = mean_exp_by_time_sex,
       mapping = aes(x = time, y = mean_exp, color = sex)) +
  geom_line() +
  facet_wrap(~ gene, scales = "free_y") +
  theme_bw() +
  theme(panel.grid = element_blank()) +
  scale_color_brewer(name = "Gender", labels = c("F", "M"), palette = "Dark2")
# manually specifying the colors
ggplot(data = mean_exp_by_time_sex,
       mapping = aes(x = time, y = mean_exp, color = sex)) +
  geom_line() +
  facet_wrap(~ gene, scales = "free_y") +
  theme_bw() +
  theme(panel.grid = element_blank()) +
  scale_color_manual(name = "Gender",  labels = c("F", "M"),
                     values = c("royalblue", "deeppink"))
rna$chromosome_name <- factor(rna$chromosome_name,
                               levels = c(1:19,"X","Y"))

count_gene_chromosome <- rna %>% select(chromosome_name, gene) %>%
  distinct() %>% ggplot() +
  geom_bar(aes(x = chromosome_name), fill = "seagreen",
           position = "dodge", stat = "count") +
  labs(y = "log10(n genes)", x = "chromosome") +
  scale_y_log10()

count_gene_chromosome
exp_boxplot_sex <- ggplot(rna, aes(y=expression_log, x = as.factor(time),
                 color=sex)) +
   geom_boxplot(alpha = 0) +
  labs(y = "Mean gene exp",
       x = "time") + theme(legend.position = "none")

exp_boxplot_sex
install.packages("patchwork")
library("patchwork")
Error in library("patchwork"): there is no package called 'patchwork'
count_gene_chromosome + exp_boxplot_sex
Error in `ggplot_add()`:
! Can't add `exp_boxplot_sex` to a <ggplot> object.
## or count_gene_chromosome | exp_boxplot_sex
count_gene_chromosome / exp_boxplot_sex
Error in count_gene_chromosome/exp_boxplot_sex: non-numeric argument to binary operator
count_gene_chromosome + exp_boxplot_sex + plot_layout(ncol = 1)
Error in `ggplot_add()`:
! Can't add `exp_boxplot_sex` to a <ggplot> object.
count_gene_chromosome +
 (count_gene_chromosome + exp_boxplot_sex) +
 exp_boxplot_sex +
 plot_layout(ncol = 1)
Error in `ggplot_add()`:
! Can't add `exp_boxplot_sex` to a <ggplot> object.
count_gene_chromosome /
 (count_gene_chromosome | exp_boxplot_sex) /
 exp_boxplot_sex
Error in count_gene_chromosome | exp_boxplot_sex: operations are possible only for numeric, logical or complex types
install.packages("gridExtra")
library("gridExtra")
Error in library("gridExtra"): there is no package called 'gridExtra'
grid.arrange(count_gene_chromosome, exp_boxplot_sex, ncol = 2)
Error in grid.arrange(count_gene_chromosome, exp_boxplot_sex, ncol = 2): could not find function "grid.arrange"
my_plot <- ggplot(data = mean_exp_by_time_sex,
       mapping = aes(x = time, y = mean_exp, color = sex)) +
  geom_line() +
  facet_wrap(~ gene, scales = "free_y") +
  labs(title = "Mean gene expression by duration of the infection",
         x = "Duration of the infection (in days)",
         y = "Mean gene expression") +
  guides(color=guide_legend(title="Gender")) +
  theme_bw() +
  theme(axis.text.x = element_text(colour = "royalblue4", size = 12),
        axis.text.y = element_text(colour = "royalblue4", size = 12),
        text = element_text(size = 16),
        panel.grid = element_line(colour="lightsteelblue1"),
        legend.position = "top")
ggsave("fig_output/mean_exp_by_time_sex.png", my_plot, width = 15,
       height = 10)

# This also works for grid.arrange() plots
combo_plot <- grid.arrange(count_gene_chromosome, exp_boxplot_sex,
                           ncol = 2, widths = c(4, 6))
ggsave("fig_output/combo_plot_chromosome_sex.png", combo_plot,
       width = 10, dpi = 300)
par(mfrow = c(1, 3))
plot(1:20, main = "First layer, produced with plot(1:20)")

plot(1:20, main = "A horizontal red line, added with abline(h = 10)")
abline(h = 10, col = "red")

plot(1:20, main = "A rectangle, added with rect(5, 5, 15, 15)")
abline(h = 10, col = "red")
rect(5, 5, 15, 15, lwd = 3)
par(mfrow = c(2, 2))
boxplot(rnorm(100),
        main = "rnorm(100) の Boxplot")
boxplot(matrix(rnorm(100), ncol = 10),
        main = "matrix(rnorm(100), ncol = 10) の Boxplot")
hist(rnorm(100))
hist(matrix(rnorm(100), ncol = 10))
count_matrix <- read.csv("data/count_matrix.csv",
                         row.names = 1) %>%
    as.matrix()

count_matrix[1:5, ]
        GSM2545336 GSM2545337 GSM2545338 GSM2545339 GSM2545340 GSM2545341
Asl           1170        361        400        586        626        988
Apod         36194      10347       9173      10620      13021      29594
Cyp2d22       4060       1616       1603       1901       2171       3349
Klk6           287        629        641        578        448        195
Fcrls           85        233        244        237        180         38
        GSM2545342 GSM2545343 GSM2545344 GSM2545345 GSM2545346 GSM2545347
Asl            836        535        586        597        938       1035
Apod         24959      13668      13230      15868      27769      34301
Cyp2d22       3122       2008       2254       2277       2985       3452
Klk6           186       1101        537        567        327        233
Fcrls           68        375        199        177         89         67
        GSM2545348 GSM2545349 GSM2545350 GSM2545351 GSM2545352 GSM2545353
Asl            494        481        666        937        803        541
Apod         11258      11812      15816      29242      20415      13682
Cyp2d22       1883       2014       2417       3678       2920       2216
Klk6           742        881        828        250        798        710
Fcrls          300        233        231         81        303        285
        GSM2545354 GSM2545362 GSM2545363 GSM2545380
Asl            473        748        576       1192
Apod         11088      15916      11166      38148
Cyp2d22       1821       2842       2011       4019
Klk6           894        501        598        259
Fcrls          248        179        184         68
dim(count_matrix)
[1] 1474   22
sample_metadata <- read.csv("data/sample_metadata.csv")
sample_metadata
       sample     organism age    sex   infection  strain time     tissue mouse
1  GSM2545336 Mus musculus   8 Female  InfluenzaA C57BL/6    8 Cerebellum    14
2  GSM2545337 Mus musculus   8 Female NonInfected C57BL/6    0 Cerebellum     9
3  GSM2545338 Mus musculus   8 Female NonInfected C57BL/6    0 Cerebellum    10
4  GSM2545339 Mus musculus   8 Female  InfluenzaA C57BL/6    4 Cerebellum    15
5  GSM2545340 Mus musculus   8   Male  InfluenzaA C57BL/6    4 Cerebellum    18
6  GSM2545341 Mus musculus   8   Male  InfluenzaA C57BL/6    8 Cerebellum     6
7  GSM2545342 Mus musculus   8 Female  InfluenzaA C57BL/6    8 Cerebellum     5
8  GSM2545343 Mus musculus   8   Male NonInfected C57BL/6    0 Cerebellum    11
9  GSM2545344 Mus musculus   8 Female  InfluenzaA C57BL/6    4 Cerebellum    22
10 GSM2545345 Mus musculus   8   Male  InfluenzaA C57BL/6    4 Cerebellum    13
11 GSM2545346 Mus musculus   8   Male  InfluenzaA C57BL/6    8 Cerebellum    23
12 GSM2545347 Mus musculus   8   Male  InfluenzaA C57BL/6    8 Cerebellum    24
13 GSM2545348 Mus musculus   8 Female NonInfected C57BL/6    0 Cerebellum     8
14 GSM2545349 Mus musculus   8   Male NonInfected C57BL/6    0 Cerebellum     7
15 GSM2545350 Mus musculus   8   Male  InfluenzaA C57BL/6    4 Cerebellum     1
16 GSM2545351 Mus musculus   8 Female  InfluenzaA C57BL/6    8 Cerebellum    16
17 GSM2545352 Mus musculus   8 Female  InfluenzaA C57BL/6    4 Cerebellum    21
18 GSM2545353 Mus musculus   8 Female NonInfected C57BL/6    0 Cerebellum     4
19 GSM2545354 Mus musculus   8   Male NonInfected C57BL/6    0 Cerebellum     2
20 GSM2545362 Mus musculus   8 Female  InfluenzaA C57BL/6    4 Cerebellum    20
21 GSM2545363 Mus musculus   8   Male  InfluenzaA C57BL/6    4 Cerebellum    12
22 GSM2545380 Mus musculus   8 Female  InfluenzaA C57BL/6    8 Cerebellum    19
dim(sample_metadata)
[1] 22  9
gene_metadata <- read.csv("data/gene_metadata.csv")
gene_metadata[1:10, 1:4]
      gene ENTREZID
1      Asl   109900
2     Apod    11815
3  Cyp2d22    56448
4     Klk6    19144
5    Fcrls    80891
6   Slc2a4    20528
7     Exd2    97827
8     Gjc2   118454
9     Plp1    18823
10    Gnb4    14696
                                                                         product
1                                 argininosuccinate lyase, transcript variant X1
2                                         apolipoprotein D, transcript variant 3
3   cytochrome P450, family 2, subfamily d, polypeptide 22, transcript variant 2
4                           kallikrein related-peptidase 6, transcript variant 2
5                  Fc receptor-like S, scavenger receptor, transcript variant X1
6            solute carrier family 2 (facilitated glucose transporter), member 4
7                                          exonuclease 3'-5' domain containing 2
8                            gap junction protein, gamma 2, transcript variant 1
9                           proteolipid protein (myelin) 1, transcript variant 1
10 guanine nucleotide binding protein (G protein), beta 4, transcript variant X2
      ensembl_gene_id
1  ENSMUSG00000025533
2  ENSMUSG00000022548
3  ENSMUSG00000061740
4  ENSMUSG00000050063
5  ENSMUSG00000015852
6  ENSMUSG00000018566
7  ENSMUSG00000032705
8  ENSMUSG00000043448
9  ENSMUSG00000031425
10 ENSMUSG00000027669
dim(gene_metadata)
[1] 1474    9
## BiocManager::install("SummarizedExperiment")
library("SummarizedExperiment")
Error in library("SummarizedExperiment"): there is no package called 'SummarizedExperiment'
stopifnot(rownames(count_matrix) == gene_metadata$gene)
stopifnot(colnames(count_matrix) == sample_metadata$sample)
se <- SummarizedExperiment(assays = list(counts = count_matrix),
                           colData = sample_metadata,
                           rowData = gene_metadata)
Error in SummarizedExperiment(assays = list(counts = count_matrix), colData = sample_metadata, : could not find function "SummarizedExperiment"
se
Error in eval(expr, envir, enclos): object 'se' not found
saveRDS(se, file = "data_output/se.rds")
rm(se)
se <- readRDS("data_output/se.rds")
head(se)
head(assay(se))
Error in assay(se): could not find function "assay"
dim(assay(se))
Error in assay(se): could not find function "assay"
colData(se)
Error in colData(se): could not find function "colData"
dim(colData(se))
Error in colData(se): could not find function "colData"
head(rowData(se))
Error in rowData(se): could not find function "rowData"
dim(rowData(se))
Error in rowData(se): could not find function "rowData"
se1 <- se[1:5, 1:3]
Error in eval(expr, envir, enclos): object 'se' not found
se1
Error in eval(expr, envir, enclos): object 'se1' not found
colData(se1)
Error in colData(se1): could not find function "colData"
rowData(se1)
Error in rowData(se1): could not find function "rowData"
se1 <- se[rowData(se)$gene_biotype == "miRNA",
          colData(se)$infection == "NonInfected"]
Error in eval(expr, envir, enclos): object 'se' not found
se1
Error in eval(expr, envir, enclos): object 'se1' not found
assay(se1)
Error in assay(se1): could not find function "assay"
colData(se1)
Error in colData(se1): could not find function "colData"
rowData(se1)
Error in rowData(se1): could not find function "rowData"
assay(se)[1:3, colData(se)$time != 4]
Error in assay(se): could not find function "assay"
# Equivalent to
assay(se)[1:3, colData(se)$time == 0 | colData(se)$time == 8]
Error in assay(se): could not find function "assay"
rna |>
    filter(gene %in% c("Asl", "Apod", "Cyd2d22"))|>
    filter(time != 4) |> select(expression)
# A tibble: 28 × 1
   expression
        <dbl>
 1       1170
 2      36194
 3        361
 4      10347
 5        400
 6       9173
 7        988
 8      29594
 9        836
10      24959
# ℹ 18 more rows
colData(se)$center <- rep("University of Illinois", nrow(colData(se)))
Error in colData(se): could not find function "colData"
colData(se)
Error in colData(se): could not find function "colData"
シー
Error in eval(expr, envir, enclos): object 'シー' not found
#BiocManager::install("tidySummarizedExperiment")
library("tidySummarizedExperiment")
Error in library("tidySummarizedExperiment"): there is no package called 'tidySummarizedExperiment'
se
Error in eval(expr, envir, enclos): object 'se' not found
options("restore_SummarizedExperiment_show" = TRUE)
se
Error in eval(expr, envir, enclos): object 'se' not found
options("restore_SummarizedExperiment_show" = FALSE)
se
Error in eval(expr, envir, enclos): object 'se' not found
se %>% filter(.sample == "GSM2545336")
Error in eval(expr, envir, enclos): object 'se' not found
se %>% select(.sample)
Error in eval(expr, envir, enclos): object 'se' not found
se %>% mutate(center = "ハイデルベルク大学")
Error in eval(expr, envir, enclos): object 'se' not found
se %>%
    group_by(.sample) %>%
    summarise(total_counts=sum(counts))
Error in eval(expr, envir, enclos): object 'se' not found
se %>%
    ggplot(aes(counts + 1, group=.sample, color=infection))+
    geom_density() +
    scale_x_log10() +
    theme_bw()
Error in eval(expr, envir, enclos): object 'se' not found

いい名前	良い代替品	避ける
最高_温度_C	最大温度	最高温度 (°C)
降水量_mm	降水量	プレcmm
平均_年_成長	平均年成長	平均成長率/年
sex	セックス	男/女
weight	重さ	w。
セル_タイプ	セルタイプ	細胞の種類
観察_01	最初の_観察	1回目の観測

コラム	説明
遺伝子	測定された遺伝子名
サンプル	遺伝子発現を測定したサンプル名
表現	遺伝子発現の値
有機体	生物／種 - ここではすべてのデータはマウスに由来する
年齢	マウスの年齢（ここではすべてのマウスが8週齢であった）
セックス	マウスの性別
感染症	マウスの感染状態、すなわちA型インフルエンザに感染しているか、感染していないか。
緊張	インフルエンザA型。
時間	感染期間（日単位）。
組織	遺伝子発現実験に使用した組織、すなわち小脳または脊髄。
マウス	マウス固有の識別子。

概要

質問

目的

表計算プログラム

スプレッドシートの概要

このレッスンで教えられないこと

なぜスプレッドシートでのデータ分析を教えないのか

課題: 隣人と次の点について話し合ってください。

スプレッドシートの問題

データ入力とクリーニングにスプレッドシートを使用する

スプレッドシートでのデータテーブルの書式設定

分析を追跡する

スプレッドシートでのデータの構造化

課題: 乱雑なデータセットを取り上げ、それをクリーンアップする方法を説明します。

課題: データを整理したら、次の質問に答えてください。

よくあるスプレッドシートのエラー

複数のテーブルの使用

複数のタブの使用

ゼロを埋めない

問題のある null 値 {#null}の使用

フォーマットを使用して情報を伝える

書式設定を使用してデータシートを美しく見せる {#formatting_pretty}

セル {#units}にコメントまたはユニットを配置する

セル {#info}に複数の情報を入力する

問題のあるフィールド名 {#field_name} の使用

データ {#special}での特殊文字の使用

データテーブル {#metadata}へのメタデータの組み込み

データのエクスポート

エラー

カンマに関する注意事項

エラー

まとめ

まとめ

概要

質問

目的

Rとは？ RStudioとは何ですか?

なぜRを学ぶのか？

Rはポインティングやクリックを多用しない。

Rコードは再現性に優れている

Rは学際的で拡張性がある

Rはあらゆる形や大きさのデータを扱う

Rは高品質のグラフィックを作成する

Rは大きく歓迎されるコミュニティ

Rは無料であるだけでなく、オープンソースでクロスプラットフォームである。

RStudioを使いこなす

セットアップ

作業ディレクトリの整理

課題：プロジェクトのディレクトリ構造を作る

作業ディレクトリ

Rとの対話

コース中やコース終了後にさらに学ぶには？

助けを求める

RStudio 組み込みのヘルプインターフェイスを使用して、R 関数の詳細情報を検索します。

使いたい関数の名前はわかっているが、その使い方がわからない。

R

R

Xを行う関数を使いたい。そのための関数があるはずだが、どれがあるのかわからない…。

R

動けないんだ…。 理解できないエラーメッセージが表示されます。

助けを求める

R

出力

R

R

R

出力

どこに助けを求めればいいのか？

その他のリソース

Rパッケージ

荷物の積み込み

R

パッケージのインストール

R

R

R

まとめ

概要

質問

目的

動けないんだ…。理解できないエラーメッセージが表示されます。