I have collected large dataset of trajectories in 3D space. Below is the information about dataset-
- Each trajectory (shape: n x 4) is saved into CSV file with the following header:
time, p_x, p_y, p_z - The filenames are defined in following way: subject_
i_trail_i.csv, whereistarts from 1 to 10.
Below are first 10 lines from subject_1_trail_1.csv file-
time p_x p_y p_z
0 0.4333 0.1107 0.1259
0.0103 0.4336 0.1106 0.126
0.02 0.4334 0.1108 0.1259
0.03 0.4333 0.1106 0.1259
0.04 0.4334 0.1107 0.1259
0.0501 0.4328 0.1103 0.126
0.06 0.4331 0.1107 0.1255
0.0703 0.4331 0.1103 0.126
0.08 0.4324 0.1102 0.126
For each subject, I want to plot the trajectory showing median and variance as shown below-
fill_between(x, perc_25, perc_75)
I am using NumPy but the code is relatively long. Below is the code snippet-
from pylab import *
import sys
# make a 3d numpy array (chop off the extra rows)
def load(location, subject, suffix):
all_data = []
min_rows = sys.maxint
for i in range(1, 11):
file_name = location + subject + '_trail_' + str(i) + '.csv'
data = np.loadtxt(file_name, delimiter=',', skiprows=1)
if data.shape[0] < min_rows:
min_rows = data.shape[0]
all_data.append(data)
all_crop_data = []
for data in all_data:
all_crop_data.append(data[:min_rows,:])
return np.array(all_crop_data)
def proc(data):
t = data[:, :, 0] # first column is time
x = data[:, :, 1] # second column is x
y = data[:, :, 2] # third column is y
z = data[:, :, 3] # fourth column is z
t_median = np.zeros(data.shape[1])
t_perc_25 = np.zeros(data.shape[1])
t_perc_75 = np.zeros(data.shape[1])
x_median = np.zeros(data.shape[1])
x_perc_25 = np.zeros(data.shape[1])
x_perc_75 = np.zeros(data.shape[1])
y_median = np.zeros(data.shape[1])
y_perc_25 = np.zeros(data.shape[1])
y_perc_75 = np.zeros(data.shape[1])
z_median = np.zeros(data.shape[1])
z_perc_25 = np.zeros(data.shape[1])
z_perc_75 = np.zeros(data.shape[1])
for i in range(data.shape[1]): # for each timestamp
t_median[i] = np.median(t[:, i])
t_perc_25[i] = np.percentile(t[:, i], 25)
t_perc_75[i] = np.percentile(t[:, i], 75)
x_median[i] = np.median(x[:, i])
x_perc_25[i] = np.percentile(x[:, i], 25)
x_perc_75[i] = np.percentile(x[:, i], 75)
y_median[i] = np.median(y[:, i])
y_perc_25[i] = np.percentile(y[:, i], 25)
y_perc_75[i] = np.percentile(y[:, i], 75)
z_median[i] = np.median(z[:, i])
z_perc_25[i] = np.percentile(z[:, i], 25)
z_perc_75[i] = np.percentile(z[:, i], 75)
all_median = np.vstack((t_median, x_median, y_median, z_median)).T
all_perc_25 = np.vstack((t_perc_25, x_perc_25, y_perc_25, z_perc_25)).T
all_perc_75 = np.vstack((t_perc_75, x_perc_75, y_perc_75, z_perc_75)).T
return all_median, all_perc_25, all_perc_75
s1 = load('/Desktop/', 'subject_1') # subject 1
s2 = load('/Desktop/', 'subject_2') # subject 2
x = np.arange(0, s1.shape[1])
s1_med, s1_perc_25, s1_perc_75 = proc(s1)
s2_med, s2_perc_25, s2_perc_75 = proc(s2)
# lets plot only x (second column)
index = 1
s1_med = s1_med[:, index]
s1_perc_25 = s1_perc_25[:, index]
s1_perc_75 = s1_perc_75[:, index]
s2_med = s2_med[:, index]
s2_perc_25 = s2_perc_25[:, index]
s2_perc_75 = s2_perc_75[:, index]
fill_between(x, s1_perc_25, s1_perc_75, alpha=0.25, linewidth=0, color='#B22400')
fill_between(x, s2_perc_25, s2_perc_75, alpha=0.25, linewidth=0, color='#006BB2')
plot(x, s1_med, linewidth=2, color='#B22400')
plot(x, s2_med, linewidth=2, color='#006BB2')
I am looking for a better, i.e., pythonic way to achieve the same. I am not sure if Pandas can be useful here. Looking for your suggestions, please.