Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Assem.Maratova
courses-binf305-Assem
Commits
b5dbabfb
Commit
b5dbabfb
authored
Sep 08, 2019
by
jeq008-uib
Browse files
assignment for week 36
parent
ee995b5a
Changes
2
Expand all
Hide whitespace changes
Inline
Side-by-side
assignment_week_36/network_tf_gene.txt
0 → 100644
View file @
b5dbabfb
This diff is collapsed.
Click to expand it.
assignment_week_36/week_36.ipynb
0 → 100644
View file @
b5dbabfb
%% Cell type:code id: tags:
```
python
import
pandas
as
pd
import
numpy
as
np
from
scipy.sparse
import
lil_matrix
```
%% Cell type:code id: tags:
```
python
df
=
pd
.
read_table
(
'network_tf_gene.txt'
,
header
=
None
,
skiprows
=
34
,
usecols
=
[
0
,
1
,
2
,
4
])
# standardize the data
for
col
in
[
0
,
1
,
2
,
4
]:
df
[
col
]
=
df
[
col
].
astype
(
str
).
str
.
lower
()
# rename columns to ease reading
df
=
df
.
rename
(
columns
=
{
0
:
'tf'
,
1
:
'gene'
,
2
:
'effect'
,
4
:
'power'
})
# filter the data
filtered
=
(
df
[
'effect'
].
isin
([
'+'
,
'-'
]))
&
(
df
[
'power'
]
==
'strong'
)
# drop duplicate rows having same tf and gene
df
=
df
[
filtered
].
drop_duplicates
(
subset
=
[
'tf'
,
'gene'
])
```
%% Cell type:code id: tags:
```
python
# create a sorted list of nodes in the network
nodes
=
df
.
tf
.
append
(
df
.
gene
).
unique
().
tolist
()
nodes
=
sorted
(
nodes
)
# create a dictionary with nodes names and their indexes
n
=
len
(
nodes
)
nodes_dict
=
dict
(
zip
(
nodes
,
np
.
arange
(
n
)))
# create an empty matrix Adj_mat of size n-by-n
Adj_mat
=
np
.
empty
(
shape
=
(
n
,
n
),
dtype
=
int
)
# fill in Adj_mat_d0 with 1 according to the data
for
i
in
df
.
index
:
r
=
nodes_dict
[
df
.
loc
[
i
,
'tf'
]]
c
=
nodes_dict
[
df
.
loc
[
i
,
'gene'
]]
# omit the diagonal elements r!=c
if
(
df
.
loc
[
i
,
'effect'
]
in
[
'+'
,
'-'
]):
Adj_mat
[
r
,
c
]
=
1
```
%% Cell type:code id: tags:
```
python
# create an empty matrix Adj_mat_d0 of size n-by-n and zeros on diagonal
Adj_mat_0d
=
Adj_mat
for
i
in
range
(
n
):
Adj_mat_0d
[
i
,
i
]
=
0
# re-define the adjacency matrix as a list of links
# this helps to decrearse memory usage and number of computations in further operations
Adj_mat_0d
=
lil_matrix
(
Adj_mat_0d
)
```
%% Cell type:code id: tags:
```
python
# define functions to count feed-forward and feed-back loops
def
count_feed_forward_loops
(
Adj_mat_0d
):
ffl
=
(
Adj_mat_0d
*
Adj_mat_0d
*
Adj_mat_0d
.
T
).
diagonal
().
sum
()
return
ffl
def
count_feed_back_loops
(
Adj_mat_0d
):
fbl
=
(
Adj_mat_0d
*
Adj_mat_0d
*
Adj_mat_0d
).
diagonal
().
sum
()
/
3
return
fbl
```
%% Cell type:code id: tags:
```
python
# compute the number of feed-forward and feed-back loops
f_f_l_data
=
count_feed_forward_loops
(
Adj_mat_0d
)
f_b_l_data
=
count_feed_back_loops
(
Adj_mat_0d
)
print
(
f
'In data: feed_forward_loops =
{
f_f_l_data
}
'
)
print
(
f
'In data: feed_back_loops =
{
f_b_l_data
}
'
)
```
%%%% Output: stream
In data: feed_forward_loops = 262
In data: feed_back_loops = 0.0
%% Cell type:code id: tags:
```
python
# make 1000 randomized networks and
# count their feed-forward and feed-back loops
# fix random seed for reproducibility
np
.
random
.
seed
(
305
)
```
%% Cell type:code id: tags:
```
python
f_f_l_random
=
[]
f_b_l_random
=
[]
counter
=
0
while
counter
<
1000
:
Adj_mat_random
=
Adj_mat
[:,
np
.
random
.
permutation
(
n
)]
Adj_mat_random
=
lil_matrix
(
Adj_mat_random
)
Adj_mat_random
.
setdiag
(
0
)
ffl
=
count_feed_forward_loops
(
Adj_mat_random
)
fbl
=
count_feed_back_loops
(
Adj_mat_random
)
f_f_l_random
.
append
(
ffl
)
f_b_l_random
.
append
(
fbl
)
counter
+=
1
```
%% Cell type:code id: tags:
```
python
print
(
f
'Mean of feed-forward loops in random networks:
{
np
.
mean
(
f_f_l_random
)
}
'
)
print
(
f
'Mean of feed-back loops in random networks:
{
np
.
mean
(
f_b_l_random
)
}
'
)
```
%%%% Output: stream
Mean of feed-forward loops in random networks: 99.59
Mean of feed-back loops in random networks: 0.963
%% Cell type:code id: tags:
```
python
def
enrichment_score
(
data
,
random
):
return
(
data
-
np
.
mean
(
random
))
/
np
.
std
(
random
)
ffl_enrichment_score
=
enrichment_score
(
f_f_l_data
,
f_f_l_random
)
fbl_enrichment_score
=
enrichment_score
(
f_b_l_data
,
f_b_l_random
)
print
(
f
'Enrichment score on feed-forward loops:
{
round
(
ffl_enrichment_score
,
4
)
}
'
)
print
(
f
'Enrichment score on feed-back loops:
{
round
(
fbl_enrichment_score
,
4
)
}
'
)
```
%%%% Output: stream
Enrichment score on feed-forward loops: 5.7703
Enrichment score on feed-back loops: -0.9117
%% Cell type:code id: tags:
```
python
print
(
't-test result on equal means for feed-forward loops:'
)
from
scipy.stats
import
ttest_ind
ttest_ind
([
f_f_l_data
]
*
1000
,
f_f_l_random
,
equal_var
=
False
)
```
%%%% Output: stream
t-test result on equal means for feed-forward loops:
%%%% Output: execute_result
Ttest_indResult(statistic=182.38291356269937, pvalue=0.0)
%% Cell type:markdown id: tags:
In the given data the number of feed-forward loop is significantly larger than in random networks with similar characteristics.
%% Cell type:code id: tags:
```
python
print
(
't-test result on equal means for feed-back loops:'
)
ttest_ind
([
f_b_l_data
]
*
1000
,
f_b_l_random
,
equal_var
=
False
)
```
%%%% Output: stream
t-test result on equal means for feed-back loops:
%%%% Output: execute_result
Ttest_indResult(statistic=-28.81699853911231, pvalue=2.140491331685297e-133)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment