Commit b5dbabfb authored by jeq008-uib's avatar jeq008-uib
Browse files

assignment for week 36

parent ee995b5a
This diff is collapsed.
%% Cell type:code id: tags:
``` python
import pandas as pd
import numpy as np
from scipy.sparse import lil_matrix
```
%% Cell type:code id: tags:
``` python
df = pd.read_table('network_tf_gene.txt',
header=None,
skiprows=34,
usecols=[0,1,2,4])
# standardize the data
for col in [0,1,2,4]:
df[col] = df[col].astype(str).str.lower()
# rename columns to ease reading
df = df.rename(columns={0:'tf',
1:'gene',
2:'effect',
4:'power'})
# filter the data
filtered = (df['effect'].isin(['+','-'])) & (df['power']=='strong')
# drop duplicate rows having same tf and gene
df = df[filtered].drop_duplicates(subset=['tf','gene'])
```
%% Cell type:code id: tags:
``` python
# create a sorted list of nodes in the network
nodes = df.tf.append(df.gene).unique().tolist()
nodes = sorted(nodes)
# create a dictionary with nodes names and their indexes
n = len(nodes)
nodes_dict = dict(zip(nodes, np.arange(n)))
# create an empty matrix Adj_mat of size n-by-n
Adj_mat = np.empty(shape=(n, n), dtype=int)
# fill in Adj_mat_d0 with 1 according to the data
for i in df.index:
r =nodes_dict[df.loc[i,'tf']]
c = nodes_dict[df.loc[i,'gene']]
# omit the diagonal elements r!=c
if (df.loc[i,'effect'] in ['+', '-']):
Adj_mat[r,c] = 1
```
%% Cell type:code id: tags:
``` python
# create an empty matrix Adj_mat_d0 of size n-by-n and zeros on diagonal
Adj_mat_0d = Adj_mat
for i in range(n): Adj_mat_0d[i,i] = 0
# re-define the adjacency matrix as a list of links
# this helps to decrearse memory usage and number of computations in further operations
Adj_mat_0d = lil_matrix(Adj_mat_0d)
```
%% Cell type:code id: tags:
``` python
# define functions to count feed-forward and feed-back loops
def count_feed_forward_loops(Adj_mat_0d):
ffl = (Adj_mat_0d*Adj_mat_0d*Adj_mat_0d.T).diagonal().sum()
return ffl
def count_feed_back_loops(Adj_mat_0d):
fbl = (Adj_mat_0d*Adj_mat_0d*Adj_mat_0d).diagonal().sum()/3
return fbl
```
%% Cell type:code id: tags:
``` python
# compute the number of feed-forward and feed-back loops
f_f_l_data = count_feed_forward_loops(Adj_mat_0d)
f_b_l_data = count_feed_back_loops(Adj_mat_0d)
print(f'In data: feed_forward_loops = {f_f_l_data}')
print(f'In data: feed_back_loops = {f_b_l_data}')
```
%%%% Output: stream
In data: feed_forward_loops = 262
In data: feed_back_loops = 0.0
%% Cell type:code id: tags:
``` python
# make 1000 randomized networks and
# count their feed-forward and feed-back loops
# fix random seed for reproducibility
np.random.seed(305)
```
%% Cell type:code id: tags:
``` python
f_f_l_random = []
f_b_l_random = []
counter = 0
while counter < 1000:
Adj_mat_random = Adj_mat[:, np.random.permutation(n)]
Adj_mat_random = lil_matrix(Adj_mat_random)
Adj_mat_random.setdiag(0)
ffl = count_feed_forward_loops(Adj_mat_random)
fbl = count_feed_back_loops(Adj_mat_random)
f_f_l_random.append(ffl)
f_b_l_random.append(fbl)
counter +=1
```
%% Cell type:code id: tags:
``` python
print(f'Mean of feed-forward loops in random networks: {np.mean(f_f_l_random)}')
print(f'Mean of feed-back loops in random networks: {np.mean(f_b_l_random)}')
```
%%%% Output: stream
Mean of feed-forward loops in random networks: 99.59
Mean of feed-back loops in random networks: 0.963
%% Cell type:code id: tags:
``` python
def enrichment_score(data, random):
return (data - np.mean(random))/np.std(random)
ffl_enrichment_score = enrichment_score(f_f_l_data, f_f_l_random)
fbl_enrichment_score = enrichment_score(f_b_l_data, f_b_l_random)
print(f'Enrichment score on feed-forward loops: {round(ffl_enrichment_score,4)}')
print(f'Enrichment score on feed-back loops: {round(fbl_enrichment_score,4)}')
```
%%%% Output: stream
Enrichment score on feed-forward loops: 5.7703
Enrichment score on feed-back loops: -0.9117
%% Cell type:code id: tags:
``` python
print('t-test result on equal means for feed-forward loops:')
from scipy.stats import ttest_ind
ttest_ind([f_f_l_data]*1000,
f_f_l_random,
equal_var = False)
```
%%%% Output: stream
t-test result on equal means for feed-forward loops:
%%%% Output: execute_result
Ttest_indResult(statistic=182.38291356269937, pvalue=0.0)
%% Cell type:markdown id: tags:
In the given data the number of feed-forward loop is significantly larger than in random networks with similar characteristics.
%% Cell type:code id: tags:
``` python
print('t-test result on equal means for feed-back loops:')
ttest_ind([f_b_l_data]*1000,
f_b_l_random,
equal_var = False)
```
%%%% Output: stream
t-test result on equal means for feed-back loops:
%%%% Output: execute_result
Ttest_indResult(statistic=-28.81699853911231, pvalue=2.140491331685297e-133)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment