ݺߣ

ݺߣShare a Scribd company logo
Parallel Computing in R
      2010/05/09 Tsukuba.R#7
            id:mickey24
id: mickey24 (@mickey24)
Tsukuba.R
Tsukuba.R#4
   ?R           Brainf*ck
     ?   Brainf*ck

> hello <- "+++++++++[>++++++++>++++++++++
+>+++++<<<-]>.>++.+++++++..++
+.>-.------------.<++++++++.--------.++
+.------.--------.>+."
> brainfxxk(hello)
[1] "Hello, world!"

                        http://www.slideshare.net/mickey24/rbrainfck-1085191
Tsukuba.R#5
? Animation with R
 ? library(animation)




                        http://d.hatena.ne.jp/mickey24/20090614
Tsukuba.R#6
? Extend R with C!!!
 ?   C   R             (C             OpenCV                 )




                 http://d.hatena.ne.jp/mickey24/20091123/r_de_extension
Parallel Computing in R
Tsukuba.R#7
?
?n   snow

?
n
?

    ?   CPU


          !"#!                 !"#!


                               !"#!
                 !!!!




                        !!!!




                               !!!!
                               !"#!
1CPU
sapply(1:8, function(x){ x^2 })



                  (function(x){x^2})(1)
                  (function(x){x^2})(2)
                  (function(x){x^2})(3)
                  (function(x){x^2})(4)
           !"#!   (function(x){x^2})(5)
                  (function(x){x^2})(6)
                  (function(x){x^2})(7)
                  (function(x){x^2})(8)




 [1]   1   4   9 16 25 36 49 64
4CPU
               sapply(1:8, function(x){ x^2 })




(function(x){x^2})(1)   (function(x){x^2})(3)   (function(x){x^2})(5)   (function(x){x^2})(7)
(function(x){x^2})(2)   (function(x){x^2})(4)   (function(x){x^2})(6)   (function(x){x^2})(8)


        !"#!                    !"#!                    !"#!                   !"#!




                   [1]      1      4     9 16 25 36 49 64
?
    ?
?
    ?      1CPU   1
     100CPU
     10
R
?   snow
    ?   R

    ?   http://cran.r-project.org/web/packages/snow/index.html

?   R       apply


?
    ?   Socket PVM MPI
n
snow
snow
?   2CPU


    ?
    ?      CPU
?   snow



> install.packages("snow")


?
    ?
matprod.R
?   1000


n <- 1000
A <- matrix(rnorm(n^2), n)
B <- matrix(rnorm(n^2), n)

C <- A %*% B


?
clmatprod.R
library(snow)

n <- 1000
A <- matrix(rnorm(n^2), n)
B <- matrix(rnorm(n^2), n)

cpu <- 2
hosts <- rep("localhost", cpu)

cl <- makeCluster(hosts, type="SOCK")
C <- parMM(cl, A, B) # C <- A %*% B
stopCluster(cl)
?
> source("clmatprod.R")


?
> head(C)
?
    ?   parMM(cl, A, B)


?              apply
    ?   parApply(cl, X, MARGIN, fun, ...)
    ?   parLapply(cl, X, fun, ...)
    ?   parSapply(cl, X, fun, ..., simplify=TRUE, USE.NAMES=TRUE)
    ?   etc.


?
    ?   clusterMap(cl, fun, ..., MoreArgs = NULL, RECYCLE = TRUE)
    ?   clusterCall(cl, fun, ...)
    ?   etc.


?              help       (?parApply,   ?clusterMap   )
snow
?
 ? snow
     ?    mCPU n

     ?    O(n^3)
                                          #!
                $!


!"#!     !"#!   !"#!   !!!!   !"#!
                                     #!
                                          !!   "!
?   1CPU(        )

        system.time(A %*% B)



?   mCPU (   )

    system.time(parMM(cl, A, B))
?                 @DBCLS
    ?
        ?   Sun Grid Engine OpenMPI

    ?
@DBCLS
        1          8
CPU         8CPU   64CPU
            16GB   128GB


      !"#$!        !"#$!


      !"#$!        !"#$!


      !"#$!        !"#$!


      !"#$!        !"#$!
Parallel Computing in R
1000
   (   )

3.00

2.50

2.00

1.50

1.00
                                     Faster!
0.50

  0
           1      4        8   16   CPU


               4CPU 1.14
3000
    (   )

40.00



30.00



20.00



10.00                                 Faster!

   0
            1      4        8   16   CPU


                8CPU 2.70
Parallel Computing in R
?   snow


?                                            C


    ?         C R                      (40       50 )
              www
        http://d.hatena.ne.jp/
        syou6162/20090117/1232120983
?   snow

    ?   snow Simpli?ed
        http://www.sfu.ca/~sblay/R/snow.html
    ?   RjpWiki - R
        http://www.okada.jp.org/RWiki/?R%A4%C7%CA%C2%CE%F3%B7%D7%BB%BB
    ?   RjpWiki - L.Tierney    snow
        http://www.okada.jp.org/RWiki/?L.%20Tierney%BB%E1%A4%CEsnow
        %A5%D1%A5%C3%A5%B1%A1%BC%A5%B8%A4%C7%A5%AF
        %A5%E9%A5%B9%A5%BF%B7%D7%BB%BB%A4%F2%B9%D4%A4%A6#scce80a1
?   Rmpi + snow + Sun Grid Engine
    ?   Scheduled Parallel Computing with R: R + Rmpi + OpenMPI + Sun Grid Engine
        (SGE)
        http://blog.nguyenvq.com/2010/01/20/scheduled-parallel-computing-with-r-r-rmpi-
        openmpi-sun-grid-engine-sge/
Parallel Computing in R
Parallel Computing in R
R+
Sun Grid Engine +
  Open MPI +
     snow
?                    @DBCLS


    ?   Sun Grid Engine

    ?   OpenMPI
        ?   mpirun        R

    ?   Rmpi    snow OpenMPI


?
    ?
?
       ?   Rmpi   OpenMPI

       ?   snow    Rmpi

> install.packages("Rmpi", configure.args=
"/path/to/mpidir")
> install.packages("snow")
gmatprod.R
library(Rmpi)
library(snow)

n <- 1000
A <- matrix(rnorm(n^2), n)
B <- matrix(rnorm(n^2), n)

cl <- makeCluster()    #
C <- parMM(cl, A, B)   # C <- A %*% B
stopCluster(cl)
gmatprod.sh
#!/bin/bash

#$ -S /bin/bash    #               /bin/bash
#$ -j y            # stdout   stderr   -o
#$ -o gmatprod.log # stdout   stderr
#$ -pe openmpi 8   #


export PATH=[path to R & snow_install_dir & mpirun]:$PATH
export LD_LIBRARY_PATH=/path/to/mpidir/lib:$LD_LIBRARY_PATH

MPIRUN_PATH=/path/to/mpirun
MPIRUN_OPTS="-np ${NSLOTS} -machinefile ${TMPDIR}/machines"

RSCRIPT=gmatprod.R
RPATH=/path/to/snow_install_dir/RMPISNOW
CMD="${RPATH} CMD BATCH --no-save ${RSCRIPT}"

${MPIRUN_PATH} ${MPIRUN_OPTS} ${CMD}
? qsub      Grid Engine


$ qsub gmatprod.sh
?
    ?   Scheduled Parallel Computing with R: R + Rmpi + OpenMPI +
        Sun Grid Engine (SGE)
        http://blog.nguyenvq.com/2010/01/20/scheduled-parallel-
        computing-with-r-r-rmpi-openmpi-sun-grid-engine-sge/


? Sun Grid Engine                  MPI               R

More Related Content

Parallel Computing in R

  • 1. Parallel Computing in R 2010/05/09 Tsukuba.R#7 id:mickey24
  • 4. Tsukuba.R#4 ?R Brainf*ck ? Brainf*ck > hello <- "+++++++++[>++++++++>++++++++++ +>+++++<<<-]>.>++.+++++++..++ +.>-.------------.<++++++++.--------.++ +.------.--------.>+." > brainfxxk(hello) [1] "Hello, world!" http://www.slideshare.net/mickey24/rbrainfck-1085191
  • 5. Tsukuba.R#5 ? Animation with R ? library(animation) http://d.hatena.ne.jp/mickey24/20090614
  • 6. Tsukuba.R#6 ? Extend R with C!!! ? C R (C OpenCV ) http://d.hatena.ne.jp/mickey24/20091123/r_de_extension
  • 9. ? ?n snow ?
  • 10. n
  • 11. ? ? CPU !"#! !"#! !"#! !!!! !!!! !!!! !"#!
  • 12. 1CPU sapply(1:8, function(x){ x^2 }) (function(x){x^2})(1) (function(x){x^2})(2) (function(x){x^2})(3) (function(x){x^2})(4) !"#! (function(x){x^2})(5) (function(x){x^2})(6) (function(x){x^2})(7) (function(x){x^2})(8) [1] 1 4 9 16 25 36 49 64
  • 13. 4CPU sapply(1:8, function(x){ x^2 }) (function(x){x^2})(1) (function(x){x^2})(3) (function(x){x^2})(5) (function(x){x^2})(7) (function(x){x^2})(2) (function(x){x^2})(4) (function(x){x^2})(6) (function(x){x^2})(8) !"#! !"#! !"#! !"#! [1] 1 4 9 16 25 36 49 64
  • 14. ? ? ? ? 1CPU 1 100CPU 10
  • 15. R ? snow ? R ? http://cran.r-project.org/web/packages/snow/index.html ? R apply ? ? Socket PVM MPI
  • 17. snow ? 2CPU ? ? CPU
  • 18. ? snow > install.packages("snow") ? ?
  • 19. matprod.R ? 1000 n <- 1000 A <- matrix(rnorm(n^2), n) B <- matrix(rnorm(n^2), n) C <- A %*% B ?
  • 20. clmatprod.R library(snow) n <- 1000 A <- matrix(rnorm(n^2), n) B <- matrix(rnorm(n^2), n) cpu <- 2 hosts <- rep("localhost", cpu) cl <- makeCluster(hosts, type="SOCK") C <- parMM(cl, A, B) # C <- A %*% B stopCluster(cl)
  • 22. ? ? parMM(cl, A, B) ? apply ? parApply(cl, X, MARGIN, fun, ...) ? parLapply(cl, X, fun, ...) ? parSapply(cl, X, fun, ..., simplify=TRUE, USE.NAMES=TRUE) ? etc. ? ? clusterMap(cl, fun, ..., MoreArgs = NULL, RECYCLE = TRUE) ? clusterCall(cl, fun, ...) ? etc. ? help (?parApply, ?clusterMap )
  • 23. snow
  • 24. ? ? snow ? mCPU n ? O(n^3) #! $! !"#! !"#! !"#! !!!! !"#! #! !! "!
  • 25. ? 1CPU( ) system.time(A %*% B) ? mCPU ( ) system.time(parMM(cl, A, B))
  • 26. ? @DBCLS ? ? Sun Grid Engine OpenMPI ?
  • 27. @DBCLS 1 8 CPU 8CPU 64CPU 16GB 128GB !"#$! !"#$! !"#$! !"#$! !"#$! !"#$! !"#$! !"#$!
  • 29. 1000 ( ) 3.00 2.50 2.00 1.50 1.00 Faster! 0.50 0 1 4 8 16 CPU 4CPU 1.14
  • 30. 3000 ( ) 40.00 30.00 20.00 10.00 Faster! 0 1 4 8 16 CPU 8CPU 2.70
  • 32. ? snow ? C ? C R (40 50 ) www http://d.hatena.ne.jp/ syou6162/20090117/1232120983
  • 33. ? snow ? snow Simpli?ed http://www.sfu.ca/~sblay/R/snow.html ? RjpWiki - R http://www.okada.jp.org/RWiki/?R%A4%C7%CA%C2%CE%F3%B7%D7%BB%BB ? RjpWiki - L.Tierney snow http://www.okada.jp.org/RWiki/?L.%20Tierney%BB%E1%A4%CEsnow %A5%D1%A5%C3%A5%B1%A1%BC%A5%B8%A4%C7%A5%AF %A5%E9%A5%B9%A5%BF%B7%D7%BB%BB%A4%F2%B9%D4%A4%A6#scce80a1 ? Rmpi + snow + Sun Grid Engine ? Scheduled Parallel Computing with R: R + Rmpi + OpenMPI + Sun Grid Engine (SGE) http://blog.nguyenvq.com/2010/01/20/scheduled-parallel-computing-with-r-r-rmpi- openmpi-sun-grid-engine-sge/
  • 36. R+ Sun Grid Engine + Open MPI + snow
  • 37. ? @DBCLS ? Sun Grid Engine ? OpenMPI ? mpirun R ? Rmpi snow OpenMPI ? ?
  • 38. ? ? Rmpi OpenMPI ? snow Rmpi > install.packages("Rmpi", configure.args= "/path/to/mpidir") > install.packages("snow")
  • 39. gmatprod.R library(Rmpi) library(snow) n <- 1000 A <- matrix(rnorm(n^2), n) B <- matrix(rnorm(n^2), n) cl <- makeCluster() # C <- parMM(cl, A, B) # C <- A %*% B stopCluster(cl)
  • 40. gmatprod.sh #!/bin/bash #$ -S /bin/bash # /bin/bash #$ -j y # stdout stderr -o #$ -o gmatprod.log # stdout stderr #$ -pe openmpi 8 # export PATH=[path to R & snow_install_dir & mpirun]:$PATH export LD_LIBRARY_PATH=/path/to/mpidir/lib:$LD_LIBRARY_PATH MPIRUN_PATH=/path/to/mpirun MPIRUN_OPTS="-np ${NSLOTS} -machinefile ${TMPDIR}/machines" RSCRIPT=gmatprod.R RPATH=/path/to/snow_install_dir/RMPISNOW CMD="${RPATH} CMD BATCH --no-save ${RSCRIPT}" ${MPIRUN_PATH} ${MPIRUN_OPTS} ${CMD}
  • 41. ? qsub Grid Engine $ qsub gmatprod.sh
  • 42. ? ? Scheduled Parallel Computing with R: R + Rmpi + OpenMPI + Sun Grid Engine (SGE) http://blog.nguyenvq.com/2010/01/20/scheduled-parallel- computing-with-r-r-rmpi-openmpi-sun-grid-engine-sge/ ? Sun Grid Engine MPI R