Start-up.
Collect all uniglomerular PN’s
upns=fc_gene_name(subset(annotation,annotation_class=='NeuronSubType' & grepl('uPN',text))$neuron_idid)
#Keep only the ones in good_images
upns=intersect(upns, good_images)
Remove 2 VAsomething and 1 VP? neurons.
#dput(fc_gene_name(subset(annotation, annotation_class=='ALGlomerulus' & text=='VAsomething')$neuron_idid))
#c("DvGlutMARCM-F078_seg1", "DvGlutMARCM-F1323_seg3")
#dput(fc_gene_name(subset(annotation, annotation_class=='ALGlomerulus' & text=='VP?')$neuron_idid))
"DvGlutMARCM-F1364_seg1"
## [1] "DvGlutMARCM-F1364_seg1"
upns=setdiff(upns, c("DvGlutMARCM-F078_seg1", "DvGlutMARCM-F1323_seg3", "DvGlutMARCM-F1364_seg1"))
length(upns)
## [1] 394
Sort glomeruli into good or bad: the former if we have more than one neuron of that class, the latter if we only have one.
badgloms=names(which(table(glomdf$text)==1))
badgloms
## [1] "DC2" "DC3" "DL4" "DM1" "V" "VL2p" "VM3" "VM6" "VP1"
goodgloms=names(which(table(glomdf$text)>1))
goodgloms
## [1] "DA1" "DA2" "DA4" "DL1" "DL2d" "DL2v" "DL3" "DM5"
## [9] "DM6" "DP1l" "VA1d" "VA1lm" "VA4" "VA7m" "VC2" "VC3l"
## [17] "VC3m" "VC4" "VL2a" "VM1" "VM2" "VM4" "VM5d" "VM5v"
## [25] "VM7" "VP4"
Using the data in upnstophits_norm, which has the query, target, query and target class, if the pair is reciprocal (duplicated), glomerulus and if the neurons are in the same class (TRUE or FALSE, or NA).
There are 58 reciprocal pairs, bringing the total of neuron pairs to 336.
Look at the 3 groups of same.class: TRUE, FALSE and NA.
There are 15 cases of non-matches (same.class==FALSE cases).
There are 319 cases of non-matches (same.class==FALSE cases).
The FALSE and NA category contains both the wrong uPN’s, oligo PNs and 1 non PN neuron.
Changing the labels: for all mismatches, labelling hits (same.class) with False if it’s matched to the wrong uPN; True if it’s matched to the right uPN and Not uPN if it’s matched to an oligo or non uPN neuron. There are 2 cases of NA’s that have to be changed as well.
upnstophits_plot=upnstophits_norm
#from the same.class==FALSE
upnstophits_plot[which(upnstophits_plot$query=="ChaMARCM-F000192_seg001"), 'same.class']="Not uPN"
upnstophits_plot[which(upnstophits_plot$query=="FruMARCM-F002090_seg001"), 'same.class']="Not uPN"
upnstophits_plot[which(upnstophits_plot$query=="GadMARCM-F000475_seg001"), 'same.class']="Not uPN"
upnstophits_plot[which(upnstophits_plot$query=="DvGlutMARCM-F004348_seg001"), 'same.class']="Not uPN"
upnstophits_plot[which(upnstophits_plot$query=="DvGlutMARCM-F002923_seg001"), 'same.class']="Not uPN"
#the 2 NA cases
upnstophits_plot[which(is.na(upnstophits_plot$same.class)), 'same.class']="Not uPN"
So, in the mismatches there are 7 to non uPN neurons
subset(upnstophits_plot, same.class=="Not uPN" & !duplicated(qp) & query.class%in%badgloms, select=c("target", "nscore", "fwd", "rev", "query.class", "target.class"))
## target nscore fwd
## GadMARCM-F000307_seg001 dTdc-2MARCM-F-032-montage_seg1 0.2161048 19702.953
## ChaMARCM-F000192_seg001 FruMARCM-M001100_seg001 0.3739047 4641.386
## ChaMARCM-F001019_seg003 ChaMARCM-F000699_seg001 0.6849646 12937.311
## rev query.class target.class
## GadMARCM-F000307_seg001 -8322.746 V <NA>
## ChaMARCM-F000192_seg001 8579.177 VP1 V+
## ChaMARCM-F001019_seg003 13197.882 VM6 <NA>
subset(upnstophits_plot, same.class=="Not uPN" & !duplicated(qp) & query.class%in%goodgloms, select=c("target", "nscore", "fwd", "rev", "query.class", "target.class"))
## target nscore fwd
## FruMARCM-F002090_seg001 DvGlutMARCM-F002946_seg001 0.6425118 7176.657
## GadMARCM-F000475_seg001 GH146MARCM-M000006_seg001 0.6739751 12775.831
## DvGlutMARCM-F004348_seg001 DvGlutMARCM-F004270_seg001 0.6285948 11381.524
## DvGlutMARCM-F002923_seg001 DvGlutMARCM-F004173_seg002 0.6424462 12735.479
## rev query.class target.class
## FruMARCM-F002090_seg001 6841.549 VM5v VM5v+VC3l
## GadMARCM-F000475_seg001 25233.268 DA1 DA1+DL3
## DvGlutMARCM-F004348_seg001 6789.593 VA1d VA1d+VM5
## DvGlutMARCM-F002923_seg001 13935.854 DL2v DL2v+DM3+
3 of these are cases is which we only have one example of that neuron (in badgloms). In two cases, neurons were matched to oligos that innervate nerby regions. In one case the neuron is a bilateral V glom, which is matched to a big bilateral neuron. In the other 4 cases of the goodlgoms the neurons are matched to oligos that also innervate that glomerulus, even though we have more example of the neurons.
For the mismatch with uPN’s: there are 10 cases. 6 of them are with unique neurons, for 4 we have more examples and NBLAST should have done better.
subset(upnstophits_plot, same.class=="FALSE" & !duplicated(qp) & query.class%in%badgloms, select=c("target", "nscore", "fwd", "rev", "query.class", "target.class"))
## target nscore fwd
## ChaMARCM-F000268_seg001 GadMARCM-F000400_seg001 0.6286771 5434.998
## FruMARCM-F000446_seg001 DvGlutMARCM-F003904_seg001 0.5526937 7164.502
## DvGlutMARCM-F003693_seg001 DvGlutMARCM-F1977_seg1 0.5576889 4796.173
## DvGlutMARCM-F083_seg1 FruMARCM-M002574_seg002 0.4646609 16557.143
## DvGlutMARCM-F249_seg1 DvGlutMARCM-F002620_seg001 0.5025500 5937.758
## FruMARCM-M002622_seg001 DvGlutMARCM-F946_seg1 0.5773130 7029.252
## rev query.class target.class
## ChaMARCM-F000268_seg001 7092.915 DC3 VA1d
## FruMARCM-F000446_seg001 6564.433 VL2p VL2a
## DvGlutMARCM-F003693_seg001 7124.290 DL4 DL1
## DvGlutMARCM-F083_seg1 6219.281 DM1 DM5
## DvGlutMARCM-F249_seg1 8167.676 VM3 VC4
## FruMARCM-M002622_seg001 9128.408 DC2 DA2
k=subset(upnstophits_plot, same.class=="FALSE" & !duplicated(qp) & query.class%in%badgloms, select=c("query.class", "target.class"))
dput(paste(k$query.class, k$target.class, sep=" to "))
## c("DC3 to VA1d", "VL2p to VL2a", "DL4 to DL1", "DM1 to DM5",
## "VM3 to VC4", "DC2 to DA2")
subset(upnstophits_plot, same.class=="FALSE" & !duplicated(qp) & query.class%in%goodgloms, select=c("target", "nscore", "fwd", "rev", "query.class", "target.class"))
## target nscore fwd
## DvGlutMARCM-F162_seg1 DvGlutMARCM-F1506_seg1 0.6986559 8487.149
## DvGlutMARCM-F004350_seg001 FruMARCM-F000992_seg001 0.7142058 11656.162
## DvGlutMARCM-F004508_seg001 FruMARCM-F000992_seg001 0.7027291 9675.747
## DvGlutMARCM-F004300_seg001 DvGlutMARCM-F002605_seg001 0.6808447 4267.234
## rev query.class target.class
## DvGlutMARCM-F162_seg1 8460.642 DL2v DL2d
## DvGlutMARCM-F004350_seg001 9435.821 VM5d VM5v
## DvGlutMARCM-F004508_seg001 9026.771 VM5d VM5v
## DvGlutMARCM-F004300_seg001 5483.835 DL2v DL2d
In the badglom cases, the neuron was matched to a neuron that innervates a nearby glomerulus: “DC3 to VA1d”, “VL2p to VL2a”, “DL4 to DL1”, “DM1 to DM5”, “VM3 to VC4”, “DC2 to DA2”.
For the goodglom cases, the neuron was matched to a neuron that innervates a nearby glomerulus: DL2v to DL2d and VM5d to VM5v.
Taking into account only neurons in goodgloms: 4 cases of match with non-uPN and 4 matched to the wrong uPN class.
subset(upnstophits_plot, same.class=="Not uPN" & !duplicated(qp) & query.class%in%goodgloms, select=c("target", "nscore", "fwd", "rev", "query.class", "target.class"))
## target nscore fwd
## FruMARCM-F002090_seg001 DvGlutMARCM-F002946_seg001 0.6425118 7176.657
## GadMARCM-F000475_seg001 GH146MARCM-M000006_seg001 0.6739751 12775.831
## DvGlutMARCM-F004348_seg001 DvGlutMARCM-F004270_seg001 0.6285948 11381.524
## DvGlutMARCM-F002923_seg001 DvGlutMARCM-F004173_seg002 0.6424462 12735.479
## rev query.class target.class
## FruMARCM-F002090_seg001 6841.549 VM5v VM5v+VC3l
## GadMARCM-F000475_seg001 25233.268 DA1 DA1+DL3
## DvGlutMARCM-F004348_seg001 6789.593 VA1d VA1d+VM5
## DvGlutMARCM-F002923_seg001 13935.854 DL2v DL2v+DM3+
subset(upnstophits_plot, same.class=="FALSE" & !duplicated(qp) & query.class%in%goodgloms, select=c("target", "nscore", "fwd", "rev", "query.class", "target.class"))
## target nscore fwd
## DvGlutMARCM-F162_seg1 DvGlutMARCM-F1506_seg1 0.6986559 8487.149
## DvGlutMARCM-F004350_seg001 FruMARCM-F000992_seg001 0.7142058 11656.162
## DvGlutMARCM-F004508_seg001 FruMARCM-F000992_seg001 0.7027291 9675.747
## DvGlutMARCM-F004300_seg001 DvGlutMARCM-F002605_seg001 0.6808447 4267.234
## rev query.class target.class
## DvGlutMARCM-F162_seg1 8460.642 DL2v DL2d
## DvGlutMARCM-F004350_seg001 9435.821 VM5d VM5v
## DvGlutMARCM-F004508_seg001 9026.771 VM5d VM5v
## DvGlutMARCM-F004300_seg001 5483.835 DL2v DL2d
Non-uPNs are matches to oligos that inneravte the same glomerulus as the query, while wrong matches are to very near glomerulus (DL2v vs DL2d and VM5v vs VM5d).
Plot the 3 groups: True, False and Not uPN, with fwd.norm and rev.norm
#change labels for plot
upnstophits_plot$same.class=factor(upnstophits_plot$same.class)
levels(upnstophits_plot$same.class)<-c("False", "Not uPN", "True") #modify legend to have 3 cases
par(mar=c(0,2,0,0))
qq=ggplot(data=subset(upnstophits_plot, !duplicated(qp)), aes(x=rev.norm, y=fwd.norm))+ geom_point(aes(colour=same.class), size=5, alpha=0.5) + labs(x="Reverse score", y="Forward score", colour="Same class")
qq+theme(axis.text.x=element_text(size=22), axis.text.y=element_text(size=22), axis.title.x=element_text(size=26), axis.title.y=element_text(size=26), legend.text=element_text(size=26), legend.title=element_text(size=26), legend.position=c(0.87, 0.15))
Get numbers for nearest neighbour interpolation.
Use a remote score matrix restricted to olfactory projection neurons:
pnscoremat <- load_si_data("pnscoremat.rds")
options(flycircuit.scoremat="pnscoremat")
Subset glomdf to include only upnswehave and neurons in classes with more than 3 neurons).
tractglomdf=load_si_data("tractglomdf.rds")
upnsnotdl2=subset(upnstophits_norm, query.class!="DL2v" & query.class!="DL2d")
upnswehave=as.character(upnsnotdl2$query)
length(upnswehave)
## [1] 214
goodgloms=names(which(table(tractglomdf$Tract.Glom)>3))
goodglomdf=subset(tractglomdf,Tract.Glom%in%goodgloms & fc_gene_name(neuron_idid)%in%upnswehave)
#Collect top 3 hits + query
scs=fc_nblast(fc_gene_name(goodglomdf$neuron_idid), fc_gene_name(goodglomdf$neuron_idid), norm="mean")
topnames=sapply(fc_gene_name(goodglomdf$neuron_idid), function(x) names(sort(scs[,x], dec=T)[1:4]))
fc_tractglom<-function(x){
idids=fc_idid(x)
gloms<-subset(tractglomdf, neuron_idid%in%idids)$Tract.Glom
gloms
}
x=apply(topnames, 2, fc_tractglom)
z=table(apply(x,2,function(y) sum(y[-1]==y[1])))
cumsum(rev(z)) # n for all 3 matches, at least 2 matches and at least one in top 3
## 3 2 1 0
## 178 180 185 187
cumsum(rev(z))/nrow(goodglomdf) *100 # %
## 3 2 1 0
## 95.18717 96.25668 98.93048 100.00000
table(apply(x,2,function(y) sum(y[2]==y[1]))) #n for when top hit matches query
##
## 0 1
## 3 184
# the 3 mismatches are
apply(topnames[,which(x[1,]!=x[2,])], 2, fc_tractglom)
## DvGlutMARCM-F003634_seg001 DvGlutMARCM-F002571_seg001
## [1,] "mVM5v" "mVM5v"
## [2,] "mVM5d" "mVM5d"
## [3,] "mVM5d" "mVM5d"
## [4,] "mVM5v" "mVM5d"
## DvGlutMARCM-F002629_seg003
## [1,] "mVM5d"
## [2,] "mVM2"
## [3,] "mVM2"
## [4,] "mVM2"
Note: the picket plot is now not shown on the figure.
Cluster the upns by hierarchical clustering. Remove the DL2 neurons prior to this (DL2d and DL2v) because they are disproportionately represented in the dataset and could bias the results.
Dendogram showing the neuron class. k=35 separates neurons into classes, i.e., each group innervates different glomeruli.
pnhc=hclustfc(upnswehave)
## Warning: Dropping 27 target neurons.
## Warning: Dropping 27 query neurons.
pnhc_h=pnhc
pnhc_h$height=sqrt(pnhc_h$height)
pnhc_hd=colour_clusters(pnhc_h, h=height_for_ngroups(pnhc_h, k=35), groupLabels=as.integer)
labels(pnhc_hd)<-upnsnotdl2[labels(pnhc_hd),'query.class']
par(mar=c(2,3,0,0))
par(cex=0.5)
plot(pnhc_hd, lwd=0.5, cex.axis=1.5)
abline(h=height_for_ngroups(pnhc_h, k=35), lty='dashed')
height_for_ngroups(pnhc_h, k=35)
## [1] 0.669249
Make a split plot showing the dendrogram and the picketplot, which shows the glomeruli, neuroblast origin and tract.
layout(matrix(1:2,ncol=1,nrow=2))
par(mar=c(1.7, 11, 1, 0))
par(cex=0.5)
plot(pnhc_hd,lwd=.5, cex.axis=1.5)
abline(h=height_for_ngroups(pnhc_h, k=35), lty='dashed')
picketPlot(picketplotdfshort[labels(pnhc_h),],grp=slice(pnhc_hd,h=height_for_ngroups(pnhc_h, k=35)),grpcol=rainbow(n=35, alpha=0.3))
Plot all upns used in the dendrogram coloured by dendrogram group.
nopen3d()
## glX
## 1
op=structure(list(FOV = 0, userMatrix = structure(c(1, 0, 0, 0,
0, -1, 0, 0, 0, 0, -1, 0, 1.29680967820684, 2.26185450224706,
0, 1), .Dim = c(4L, 4L)), scale = c(1, 1, 1), zoom = 0.547308325767517), .Names = c("FOV",
"userMatrix", "scale", "zoom"))
par3d(op)
for(i in 1:35) {
plot3d(subset(pnhc_h, k=35, groups=i), col=rainbow(n=35, alpha=0.3)[i], soma=T, skipRedraw=TRUE)
}
Plot neurons of the first 5 dendrogram groups. One plot of each group plus one plot with all.
Group 1:
clear3d()
plot3d(FCWB)
opgroup=structure(list(FOV = 0, userMatrix = structure(c(1, 0, 0, 0,
0, -1, 0, 0, 0, 0, -1, 0, -76.1611144774601, -40.8901171571298,
0, 1), .Dim = c(4L, 4L)), scale = c(1, 1, 1), zoom = 0.2165886759758), .Names = c("FOV",
"userMatrix", "scale", "zoom"))
par3d(opgroup)
plot3d(subset(pnhc_h, h=height_for_ngroups(pnhc_h, k=35), groups=1), col=rainbow(n=35, alpha=0.3)[1], soma=T)
Group 2:
clear3d()
plot3d(FCWB)
par3d(opgroup)
plot3d(subset(pnhc_h, h=height_for_ngroups(pnhc_h, k=35), groups=2), col=rainbow(n=35, alpha=0.3)[2], soma=T)
Group 3:
clear3d()
plot3d(FCWB)
par3d(opgroup)
plot3d(subset(pnhc_h, h=height_for_ngroups(pnhc_h, k=35), groups=3), col=rainbow(n=35, alpha=0.3)[3], soma=T)
Group 4:
clear3d()
plot3d(FCWB)
par3d(opgroup)
plot3d(subset(pnhc_h, h=height_for_ngroups(pnhc_h, k=35), groups=4), col=rainbow(n=35, alpha=0.3)[4], soma=T)
Group 5:
clear3d()
plot3d(FCWB)
par3d(opgroup)
plot3d(subset(pnhc_h, h=height_for_ngroups(pnhc_h, k=35), groups=5), col=rainbow(n=35, alpha=0.3)[5], soma=T)
Plot the 5 groups.
clear3d()
plot3d(FCWB)
par3d(opgroup)
plot3d(FCWBNP.surf, materials="AL_L", alpha=0.1, col='green')
plot3d(FCWBNP.surf, materials="LH_L", alpha=0.1, col='blue')
for(i in 1:5) {
plot3d(subset(pnhc_h, k=35, groups=i), col=rainbow(n=35, alpha=0.3)[i], soma=T)
}