regex - Detect partial string matches in R -
i trying count number of transactions start ak , contain ak within transaction, don't end in ak.
examples:
exclude: example: ak->se (no ak in between)
exclude: ak->gg->se->ll : not include ak within transaction
include: example: ak->se->ak->gg
sample data:
f<- data.frame( id=c("a","a","a","a","c","c","d","d","e"), mode=c("ak->se","se->ak->gg, bishan->k","ak->se","se->gr->gg, bishan->ak","ak->se","se->gr->gg, bishan->ak","ak->se","se->gr->gg, bishan->ak","se->ak->df, hg->pp->sk") )
i need deal large amount of data optimization crucial.
thanks in advance.
edited
f<- data.frame( id=c("a","a","a","a","c","c","d","d","e"), mode=c("ak->se","se->ak->gg, bishan->k","ak->se","se->gr->gg, bishan->ak","ak->se","ak->ak->gg, bishan->ak","ak->se->ak->gg","se->gr->gg, bishan->ak","ak->ak->df, hg->pp->sk") )
using regular expression
f<- data.frame( id=c("a","a","a","a","c","c","d","d","e"), mode=c("ak->se","se->ak->gg, bishan->k","ak->se","se->gr->gg, bishan->ak","ak->se","se->gr->gg, bishan->ak","ak->se->ak->gg","se->gr->gg, bishan->ak","se->ak->df, hg->pp->sk") ) selection = grepl(pattern="^ak->.*ak->",x=f$mode,perl=true) f$mode[selection] f$id[selection]
using lapply (might bit slower if there lot of strings)
f<- data.frame( id=c("a","a","a","a","c","c","d","d","e"), mode=c("ak->se","se->ak->gg, bishan->k","ak->se","se->gr->gg, bishan->ak","ak->se","se->gr->gg, bishan->ak","ak->se->ak->gg","se->gr->gg, bishan->ak","se->ak->df, hg->pp->sk") ) selection = sapply(strsplit(x=f$mode,split="->"),fun=function(x) (x[1]=="ak")&(x[length(x)]!="ak")&(sum(x=="ak")>1)) f$mode[selection] f$id[selection]
Comments
Post a Comment