Explore MARIMBA-Annotated Cell Atlas
Thu Nov 5 02:05:27 UTC 2020
Download Data
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23import requests
from tqdm import tnrange, tqdm_notebook
def download_file(doi,ext):
url = 'https://api.datacite.org/dois/'+doi+'/media'
r = requests.get(url).json()
netcdf_url = r['data'][0]['attributes']['url']
r = requests.get(netcdf_url,stream=True)
#Set file name
fname = doi.split('/')[-1]+ext
#Download file with progress bar
if r.status_code == 403:
print("File Unavailable")
if 'content-length' not in r.headers:
print("Did not get file")
else:
with open(fname, 'wb') as f:
total_length = int(r.headers.get('content-length'))
pbar = tnrange(int(total_length/1024), unit="B")
for chunk in r.iter_content(chunk_size=1024):
if chunk:
pbar.update()
f.write(chunk)
return fname
#Marimba filtered/clustered adata
download_file('10.22002/D1.1833','.gz')
/ usr / local / lib / python3 . 6 / dist - packages / ipykernel_launcher . py : 18 : TqdmDeprecationWarning : Please use ` tqdm . notebook . trange ` instead of ` tqdm . tnrange `
HBox ( children = ( FloatProgress ( value = 0 . 0 , max = 520896 . 0 ), HTML ( value = '' )))
'D1.1833.gz'
!pip install --quiet anndata
!pip install --quiet scanpy
[K |████████████████████████████████| 122kB 13.7MB/s
[K |████████████████████████████████| 7.7MB 10.7MB/s
[K |████████████████████████████████| 51kB 5.7MB/s
[K |████████████████████████████████| 71kB 7.1MB/s
[?25h Building wheel for sinfo (setup.py) ... [?25l[?25hdone
Import Packages
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#Install Packages
import random
import pandas as pd
import anndata
import scanpy as sc
import numpy as np
import scipy.sparse
import warnings
warnings.filterwarnings('ignore')
#import scrublet as scr
import matplotlib.pyplot as plt
%matplotlib inline
sc.set_figure_params(dpi=125)
import seaborn as sns
sns.set(style="whitegrid")
Read in Data for Plotting and Embedding Visualizations
#Read in data
jelly_adata = anndata.read('D1.1833')
jelly_adata
AnnData object with n_obs × n_vars = 13673 × 9609
obs : 'batch' , 'orgID' , 'fed' , 'starved' , 'cellRanger_louvain' , 'n_counts' , 'n_countslog'
var : 'n_counts' , 'mean' , 'std'
uns : 'cellRanger_louvain_colors' , 'cellRanger_louvain_sizes' , 'neighbors' , 'paga' , 'pca' , 'rank_genes_groups' , 'umap'
obsm : 'X_pca' , 'X_umap'
varm : 'PCs'
obsp : 'connectivities' , 'distances'
#Add dendrogram
sc.tl.dendrogram(jelly_adata,'cellRanger_louvain',linkage_method='ward')
jelly_adata.uns['dendrogram_cellRanger_louvain'] = jelly_adata.uns["dendrogram_['cellRanger_louvain']"]
Plot genes of interest on heatmap
jelly_adata.obs['cellRanger_louvain'] = pd.Categorical(jelly_adata.obs['cellRanger_louvain'])
#Example genes list
genes = ['XLOC_014365','XLOC_039658','XLOC_026179','XLOC_037659','XLOC_014942','XLOC_009450','XLOC_037610','XLOC_035947',
'XLOC_045755','XLOC_043531','XLOC_039390','XLOC_040602','XLOC_034872','XLOC_021040','XLOC_044379',
'XLOC_003367','XLOC_010274','XLOC_033853','XLOC_036636','XLOC_021850']
sc.pl.heatmap(jelly_adata, genes, groupby='cellRanger_louvain',dendrogram=True, show_gene_labels=True,swap_axes=False,figsize = (10,30),
cmap='PuBuGn',standard_scale='var')
Plot genes of interest on cell atlas embedding
sc.pl.umap(jelly_adata,color=genes)