-
Notifications
You must be signed in to change notification settings - Fork 0
/
support_functions.py
118 lines (91 loc) · 3.8 KB
/
support_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import numpy as np
import random as rd
from scipy.stats import zscore
def distance_matrix(data,metr):
m , k = data.shape
dist_matrix = np.zeros((m,m), dtype=float)
for i in range(0,m):
for j in range(i,m):
dist_matrix[i,j] = metr(data.values[i,:],data.values[j,:])
dist_matrix[j,i] = dist_matrix[i,j]
return dist_matrix
def distance_matrix_np(data,metr):
m , k = data.shape
dist_matrix = np.zeros((m,m), dtype=float)
for i in range(0,m):
for j in range(i,m):
dist_matrix[i,j] = metr(data[i,:],data[j,:])
dist_matrix[j,i] = dist_matrix[i,j]
return dist_matrix
def normalise_data(data):
data_normed = data
i = 0
j = 0
for i in range(0,data.shape[0]):
colum_sum = np.sum(data[i,:])
for j in range(0,data.shape[1]):
data_normed[i,j] = data[i,j]/colum_sum
return data_normed
def percentage_normalise_z(data,p1,p2):
# p1 treshold ispod kog je gen ne bitan
# p2 treshold zastupljenosti gena u svim uzorcima
data = zscore(data)
data_smaller = []
for i in range(0,data.shape[1]):
counter = 0
data[:,i] = data[:,i]/max(data[:,i])
for j in range(0, data.shape[0]):
if data[j,i] < p1:
data[j,i] = 0
counter = counter + 1
if (counter/float(data.shape[0])) >p2 :
data_smaller.append(data[:,i])
return np.array(data_smaller,dtype=float)
def percentage_binary_normalise_z(data,p1,p2):
# p1 treshold ispod kog je gen ne bitan
# p2 treshold zastupljenosti gena u svim uzorcima
data = zscore(data)
data_smaller = []
counter = 0
for i in range(0,data.shape[1]):
data[:,i] = data[:,i]/max(data[:,i])
data[:,i] = data[:,i] <p1
counter = np.sum(data[:,i])
if (counter/float(data.shape[0])) >p2 :
data_smaller.append(data[:,i])
return np.array(data_smaller,dtype=float)
def percentage_normalise(data,p1,p2):
# p1 treshold ispod kog je gen ne bitan
# p2 treshold zastupljenosti gena u svim uzorcima
data = normalise_data(data)
data_smaller = []
for i in range(0,data.shape[1]):
counter = 0
data[:,i] = data[:,i]/max(data[:,i])
for j in range(0, data.shape[0]):
if data[j,i] < p1:
data[j,i] = 0
counter = counter + 1
if (counter/float(data.shape[0])) >p2 :
data_smaller.append(data[:,i])
return np.array(data_smaller,dtype=float)
def percentage_binary_normalise(data,p1,p2):
# p1 treshold ispod kog je gen ne bitan
# p2 treshold zastupljenosti gena u svim uzorcima
data = normalise_data(data)
data_smaller = []
counter = 0
for i in range(0,data.shape[1]):
data[:,i] = data[:,i]/max(data[:,i])
data[:,i] = data[:,i] <p1
counter = np.sum(data[:,i])
if (counter/float(data.shape[0])) >p2 :
data_smaller.append(data[:,i])
return np.array(data_smaller,dtype=float)
def rgb_color_random():
R = rd.randint(0,255)
G = rd.randint(0,255)
B = rd.randint(0,255)
return 'rgb('+str(R)+','+str(G)+','+str(B)+')'
'rgb(49,130,189)'
# prati smanjenje dimenzionalnosti :)