#C++, 979 969 898 859 859848 bytes
#include<cstdio>
#include<cstdlib>
#define K 400
#define L 400
#define M (i*)malloc(sizeof(i))
#define a(C,X,Y)if(C&&b[Y][X].c){t->n=M;t=t->n;b[Y][X].d=d+1;t->n=0;t->c=X;t->d=Y;}
#define A(n,d)case n:d;break;
#define F fgetc(f)
#define W(A,B) for(A=0;A<B;A++){
struct i{int c;int d;int v;i*n;}b[L][K]={0},*h,*t;float m=0;int main(){FILE*f=fopen("d","r+b");int x,y,d=0;W(y,L)W(x,K)b[y][x].c=F<<16|F<<8|F;}}rewind(f);x=165,y=155;h=M;h->c=x;h->d=y;b[y][x].d=d;t=h;while(h){i*p=&b[hi*p=b[h->d][h>d]+h->c];if>c;if(p->v){h=h->n;continue;}>n;else{p->v=1;x=h->c;y=h->d;d=p->d;m=d>m?d:m;a(x>0,x-1,y)a(x<K-1,x+1,y)a(y>0,x,y-1)a(y<L-1,x,y+1)}}W(y,L)W(x,K)i p=b[y][x];unsigned char n=-1,h=p.d/(m/n),R=h%43*6,Q=n*(n-(n*R>>8))>>8,t=n*(n-(n*(n-R)>>8))>>8,r=0r,g=0g,b=0;switchb;switch(h/43){A(0,r=n;g=tn,t,0)A(1,r=Q;g=nQ,n,0)A(2,g=n;b=t0,n,t)A(3,g=Q;b=n0,Q,n)A(4,r=t;b=nt,0,n)A(5,r=n;b=Qn,0,Q)}d=h?r|g<<8|b<<16:p.c?-1:0;fwrite(&d,1,3,f);}}}
Many concepts remain similar, but there are certainly a myriad of tiny changes. To compile that as C you do need to use C11 (C99 will probably work but I only strictly tested in C11).
I quite enjoyed this challenge, thanks for giving me the idea to try something new :).
Edit: Golf'd a bit better.
Edit2: Merged two structs so my pixel struct and queue are the same, bit more macro abuse, and reflowed uses of 255 such that it can be defined as -1 when defining a series of unsigned chars, and lastly removed a function call.
Edit3: Reused a few more variables, operator precedence tweaks, and output converted to RGB saving the alpha channel
Edit4: I think I am done with this now, some pointer arithmetic changes and slight control flow tweaks.