0
\$\begingroup\$

URL class.

Providing a way to look at the different parts of a URL:

http://localhost:53/status?name=ryan#234
^^^^                                      > Protocol
       ^^^^^^^^^^^^                       > server (and port)
                   ^^^^^^^                > Path
                          ^^^^^^^^^^      > Query
                                    ^^^^  > Hash (or Fragment)

Note: The definition of a URL requires that the Path and Query are seporated by ?. But in the real world there are so many badly formed URL and these two parts are often separated by & (particularly prevalent on sloppy PHP servers). Thus in the code I will split the path from the query on either of ? or &.

I use a single std::string are the storage.
I calculate std::string_view into each of the different sections (so I don't need extra storage for these).

The second constructor URL::URL(std::string_view prot, std::string_view host, std::string_view request) is based on how a webserver finds the different parts in diferent sections of the request.

URL.h

#ifndef THORSANVIL_NISSE_NISSEHTTP_URL_H
#define THORSANVIL_NISSE_NISSEHTTP_URL_H

#include "NisseHTTPConfig.h"
#include <string>
#include <string_view>

namespace ThorsAnvil::Nisse::NisseHTTP
{

class URL
{
    std::string         hrefValue;
    std::string_view    protocolRef;
    std::string_view    originRef;
    std::string_view    hostRef;
    std::string_view    portRef;
    std::string_view    hostnameRef;
    std::string_view    pathRef;
    std::string_view    queryRef;
    std::string_view    hashRef;

    public:
        URL()   {}
        URL(std::string_view href);
        URL(std::string_view prot, std::string_view host, std::string_view request);

        URL(URL const& copy);
        URL(URL&& move) noexcept;
        URL& operator=(URL copyORmove) noexcept;

        bool operator==(URL const& rhs)     const {return hrefValue == rhs.hrefValue;}
        bool operator!=(URL const& rhs)     const {return !(*this == rhs);}

        std::string_view        href()      {return hrefValue;}     // 'http://localhost:53/status?name=ryan#234'
        std::string_view        protocol()  {return protocolRef;}   // 'http:'
        std::string_view        origin()    {return originRef;}     // 'http://localhost:53'
        std::string_view        host()      {return hostRef;}       // 'localhost:53',
        std::string_view        hostname()  {return hostnameRef;}   // 'localhost',
        std::string_view        port()      {return portRef;}       // '53'
        std::string_view        pathname()  {return pathRef;}       // '/status',
        std::string_view        query()     {return queryRef;}      // '?name=ryan',
        std::string_view        hash()      {return hashRef;}       // '#234'

        std::string_view        param(std::string_view param);  // parm('name') => 'ryan'

        void swap(URL& other) noexcept;
        friend void swap(URL& lhs, URL& rhs) noexcept {lhs.swap(rhs);}
    private:
        static std::string buildHref(std::string_view prot, std::string_view host, std::string_view request);
        std::string_view findProtocol(std::string const& src);
        std::string_view findOrigin(std::string const& src);
        std::string_view findHost(std::string const& src);
        std::string_view findHostname(std::string const& src);
        std::string_view findPort(std::string const& src);
        std::string_view findPath(std::string const& src);
        std::string_view findQuery(std::string const& src);
        std::string_view findHash(std::string const& src);
};

}

#endif

URL.cpp

#include "URL.h"

using namespace ThorsAnvil::Nisse::NisseHTTP;

URL::URL(std::string_view href)
    : hrefValue(href)
    , protocolRef{findProtocol(hrefValue)}
    , originRef{findOrigin(hrefValue)}
    , hostRef{findHost(hrefValue)}
    , portRef{findPort(hrefValue)}
    , hostnameRef{findHostname(hrefValue)}
    , pathRef{findPath(hrefValue)}
    , queryRef{findQuery(hrefValue)}
    , hashRef{findHash(hrefValue)}
{}

URL::URL(std::string_view prot, std::string_view host, std::string_view request)
    : hrefValue(buildHref(prot, host, request))
    , protocolRef{findProtocol(hrefValue)}
    , originRef{findOrigin(hrefValue)}
    , hostRef{findHost(hrefValue)}
    , portRef{findPort(hrefValue)}
    , hostnameRef{findHostname(hrefValue)}
    , pathRef{findPath(hrefValue)}
    , queryRef{findQuery(hrefValue)}
    , hashRef{findHash(hrefValue)}
{}

URL::URL(URL const& copy)
    : hrefValue(copy.hrefValue)
    , protocolRef{findProtocol(hrefValue)}
    , originRef{findOrigin(hrefValue)}
    , hostRef{findHost(hrefValue)}
    , portRef{findPort(hrefValue)}
    , hostnameRef{findHostname(hrefValue)}
    , pathRef{findPath(hrefValue)}
    , queryRef{findQuery(hrefValue)}
    , hashRef{findHash(hrefValue)}
{}

URL::URL(URL&& move) noexcept
    // All members default initialized to empty.
{
    swap(move);
}

URL& URL::operator=(URL copyORmove) noexcept
{
    swap(copyORmove);
    return *this;
}

void URL::swap(URL& other) noexcept
{
    using std::swap;
    swap(hrefValue,     other.hrefValue);
    swap(protocolRef,   other.protocolRef);
    swap(originRef,     other.originRef);
    swap(hostRef,       other.hostRef);
    swap(portRef,       other.portRef);
    swap(hostnameRef,   other.hostnameRef);
    swap(pathRef,       other.pathRef);
    swap(queryRef,      other.queryRef);
    swap(hashRef,       other.hashRef);
}

std::string URL::buildHref(std::string_view prot, std::string_view host, std::string_view request)
{
    std::string href;

    href = prot;
    href += "://";
    href += host;
    href += request;

    return href;
}

std::string_view URL::findProtocol(std::string const& src)
{
    std::size_t size = std::min(src.size(), src.find("://"));
    return {src.begin(), src.begin() + size};
}

std::string_view URL::findOrigin(std::string const& src)
{
    std::size_t skipProto = std::min(src.size(), protocolRef.size() + 3);
    std::size_t size = std::min(src.size(), src.find_first_of("/?&#", skipProto));
    return {src.begin(), src.begin() + size};
}

std::string_view URL::findHost(std::string const& src)
{
    std::size_t beg = std::min(src.size(), protocolRef.size() + 3);
    std::size_t end = std::min(src.size(), originRef.size());
    return {src.begin() + beg, src.begin() + end};
}

std::string_view URL::findPort(std::string const&)
{
    std::string_view result = hostRef;
    std::size_t find = std::min(result.size(), result.find(':'));
    result.remove_prefix(find);
    return result;
}

std::string_view URL::findHostname(std::string const&)
{
    std::string_view result = hostRef;
    result.remove_suffix(portRef.size());
    return result;
}

std::string_view URL::findPath(std::string const& src)
{
    std::size_t skipProto = std::min(src.size(), protocolRef.size() + 3);
    std::size_t beg = std::min(src.size(), src.find_first_of("/?&#", skipProto));
    std::size_t end = std::min(src.size(), src.find_first_of("?&#", beg));
    return {src.begin() + beg, src.begin() + end};
}

std::string_view URL::findQuery(std::string const& src)
{
    std::size_t skipProto = std::min(src.size(), protocolRef.size() + 3);
    std::size_t beg = std::min(src.size(), src.find_first_of("?&#", skipProto));
    std::size_t end = std::min(src.size(), src.find_first_of("#", beg));
    return {src.begin() + beg, src.begin() + end};
}

std::string_view URL::findHash(std::string const& src)
{
    std::size_t skipProto = std::min(src.size(), protocolRef.size() + 3);
    std::size_t beg = std::min(src.size(), src.find_first_of("#", skipProto));
    std::size_t end = src.size();
    return {src.begin() + beg, src.begin() + end};
}

std::string_view URL::param(std::string_view /*param*/)
{
    // TODO
    return "";
}
\$\endgroup\$
3
  • \$\begingroup\$ Are you intending to have a RFC 3986 compatible URI parser? As I see you have intent for handling "3.2 path" parameters, but are missing "3.2.1 User Information". Additionally "protocol" is "3.1 scheme" and "server (and port)" is "3.2 authority". \$\endgroup\$ Commented Oct 20, 2024 at 18:29
  • \$\begingroup\$ @Peilonrayz: Adding the user/password information to the URI I believe (could be wrong) is now considered out-dated (because of security) and this information is more properly passed in the header information or as part of the transport protocol (ssh). I actually had it in there as part of my initial code but removed it. \$\endgroup\$ Commented Oct 20, 2024 at 20:43
  • \$\begingroup\$ I'm pretty sure supplying the username and password 100% should always be avoided. However, ssh://[email protected]:Loki-Astari/ThorsSocket.git is GitHub URIs. \$\endgroup\$ Commented Oct 20, 2024 at 20:56

0

You must log in to answer this question.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.