Consider following data:
> df
   nt_1 NB1949j NB2021j NB3366j NB19491f NB2021f NB3366f nt_2
1     1       1       1       1        1       0       1    1
2     0       0       1       0        1       1       0    0
3     1       1       0       0        0       0       0    1
4     0       0       1       1        1       1       1    0
5     0       0       1       1        1       1       0    0
6     1       1       1       0        1       0       0    1
7     1       1       0       0        0       1       0    1
8     0       0       1       0        0       0       1    0
9     0       0       0       0        0       1       0    0
10   -1       1       1       1        1       1       1   -1
How to calculate following variables:
NB1949jf=NB1949j+NB1949f
NB2021jf=NB2021j+NB2021f
NB3366jf=NB3366j+NB3366f
I have too many variables that start with "NB" and end with "j" or "f". The names of variables is "NB"+ a unique code like 1949+ 'j' or 'f'. I want to sum of columns based on the unique code(like 1949).(It require to get the unique codes from dataframe col names)
library(dplyr)
df=data.frame(
  nt_1=c(1,0,1,0,0,1,1,0,0,-1),
  NB1949j=c(1,0,1,0,0,1,1,0,0,1),
  NB2021j=c(1,1,0,1,1,1,0,1,0,1),
 NB3366j=c(1,0,0,1,1,0,0,0,0,1),
  NB1949f=c(1,1,0,1,1,1,0,0,0,1),
  NB2021f=c(0,1,0,1,1,0,1,0,1,1),
  NB3366f=c(1,0,0,1,0,0,0,1,0,1),
  nt_2=c(1,0,1,0,0,1,1,0,0,-1)
 )
NB1949jf=NB1949j+NB1949f
NB2021jf=NB2021j+NB2021f
NB3366jf=NB3366j+NB3366f
I think the first step to select the columns:
df2<-df %>% select(starts_with("NB") , ends_with("j") |ends_with("f"))
 df2
   NB1949j NB2021j NB3366j NB19491f NB2021f NB3366f
1        1       1       1        1       0       1
2        0       1       0        1       1       0
3        1       0       0        0       0       0
4        0       1       1        1       1       1
5        0       1       1        1       1       0
6        1       1       0        1       0       0
7        1       0       0        0       1       0
8        0       1       0        0       0       1
9        0       0       0        0       1       0
10       1       1       1        1       1       1
Now extract the unique codes as follows:
code1<-substr(names(df2),3,nchar(names(df2))-1)
