50. HPC Cloud - Web Service
http://seqware.github.io/docs/
LIMS
Object
Storage
High
Speed
File
Transfer
IaaS
HPC
Private Cloud
OpenStack
Job Schedule
Bioinformatics
Linux
Bioinformatics
Bioinformatics
Hadoop and
Database
57. # Compute the Ti/Tv ratio for BRCA1.
SELECT
transitions,
transversions,
transitions/transversions AS titv
FROM (
SELECT
SUM(IF(mutation IN ('A->G',
'G->A',
'C->T',
'T->C'),
INTEGER(num_snps),
INTEGER(0))) AS transitions,
SUM(IF(mutation IN ('A->C',
'C->A',
'G->T',
'T->G',
'A->T',
'T->A',
'C->G',
'G->C'),
INTEGER(num_snps),
INTEGER(0))) AS transversions,
FROM (
SELECT
CONCAT(reference_bases,
CONCAT(STRING('->'),
alternate_bases)) AS mutation,
COUNT(alternate_bases) AS num_snps,
FROM
[google.com:biggene:1000genomes.variants1kG]
WHERE
contig = '17'
AND position BETWEEN 41196312
AND 41277500
AND vt = 'SNP'
GROUP BY
mutation
ORDER BY
mutation));
Google BigQuery with plot
result <- query_exec(project = "google.com:biggene", dataset =
"1000genomes",
query = sql, billing = billing_project)
Ti/Tv ratio in BRCA1
58. # Count the variation for each sample including phenotypic traits
SELECT
samples.genotype.sample_id AS sample_id,
gender,
population,
super_population,
COUNT(samples.genotype.sample_id) AS num_variants_for_sample,
SUM(IF(samples.af >= 0.05,
INTEGER(1),
INTEGER(0))) AS common_variant,
SUM(IF(samples.af < 0.05
AND samples.af > 0.005,
INTEGER(1),
INTEGER(0))) AS middle_variant,
SUM(IF(samples.af <= 0.005
AND samples.af > 0.001,
INTEGER(1),
INTEGER(0))) AS rare_variant,
SUM(IF(samples.af <= 0.001,
INTEGER(1),
INTEGER(0))) AS very_rare_variant,
FROM
FLATTEN([google.com:biggene:1000genomes.variants1kG],
genotype) AS samples
JOIN
[google.com:biggene:1000genomes.sample_info] p
ON
samples.genotype.sample_id = p.sample
WHERE
samples.vt = 'SNP'
AND (samples.genotype.first_allele > 0
OR samples.genotype.second_allele > 0)
GROUP BY
sample_id,
gender,
population,
super_population
ORDER BY
sample_id;
Google BigQuery with R
ggplot(result, aes(x = population, y = common_variant, fill =
super_population)) +
geom_boxplot() + ylab("Count of common variants per sample") +
ggtitle("Common Variants (Minimum Allelic Frequency 5%)")
Variant type