Return to Answer

Post Timeline

Add note about specific function to use

Source Link

edited Jan 18, 2024 at 5:19

The attributedBody field is encodedstored in an Apple-proprietary typedstream format, which I've found is easilyactually quite parseable via the pytypedstream package for Python (use the typedstream.stream.TypedStreamReader function).

Assuming you are using pandas, here is a full example to retrieve all messages across all conversations. I leave it to you to filter the data how you please.

import os
import os.path
import sqlite3

import pandas as pd
from typedstream.stream import TypedStreamReader


# The textual contents of some messages are encoded in a special attributedBody
# column on the message row; this attributedBody value is in Apple's proprietary
# typedstream format, but can be parsed with the pytypedstream package
# (<https://pypi.org/project/pytypedstream/>)
def decode_message_attributedbody(data):
    if not data:
        return None
    for event in TypedStreamReader.from_data(data):
        # The first bytes object is the one we want
        if type(event) is bytes:
            return event.decode("utf-8")


def main():
    db_path = os.path.expanduser("~/Library/Messages/chat.db")
    with sqlite3.connect(db_path) as connection:
        messages_df = pd.read_sql_query(
            sql="SELECT text, attributedBody FROM message ORDER BY date DESC",
            con=connection,
            parse_dates={"datetime": "ISO8601"},
        )
        # Decode any attributedBody values and merge them into the 'text' column
        messages_df["text"] = messages_df["text"].fillna(
            messages_df["attributedBody"].apply(decode_message_attributedbody)
        )
        print(messages_df["text"])


if __name__ == "__main__":
    main()

The attributedBody field is encoded in an Apple-proprietary typedstream format, which I've found is easily parseable via the pytypedstream package for Python. Assuming you are using pandas, here is a full example to retrieve all messages across all conversations. I leave it to you to filter the data how you please.

import os
import os.path
import sqlite3

import pandas as pd
from typedstream.stream import TypedStreamReader


# The textual contents of some messages are encoded in a special attributedBody
# column on the message row; this attributedBody value is in Apple's proprietary
# typedstream format, but can be parsed with the pytypedstream package
# (<https://pypi.org/project/pytypedstream/>)
def decode_message_attributedbody(data):
    if not data:
        return None
    for event in TypedStreamReader.from_data(data):
        # The first bytes object is the one we want
        if type(event) is bytes:
            return event.decode("utf-8")


def main():
    db_path = os.path.expanduser("~/Library/Messages/chat.db")
    with sqlite3.connect(db_path) as connection:
        messages_df = pd.read_sql_query(
            sql="SELECT text, attributedBody FROM message ORDER BY date DESC",
            con=connection,
            parse_dates={"datetime": "ISO8601"},
        )
        # Decode any attributedBody values and merge them into the 'text' column
        messages_df["text"] = messages_df["text"].fillna(
            messages_df["attributedBody"].apply(decode_message_attributedbody)
        )
        print(messages_df["text"])


if __name__ == "__main__":
    main()

The attributedBody field is stored in an Apple-proprietary typedstream format, which I've found is actually quite parseable via the pytypedstream package for Python (use the typedstream.stream.TypedStreamReader function).

Assuming you are using pandas, here is a full example to retrieve all messages across all conversations. I leave it to you to filter the data how you please.

import os
import os.path
import sqlite3

import pandas as pd
from typedstream.stream import TypedStreamReader


# The textual contents of some messages are encoded in a special attributedBody
# column on the message row; this attributedBody value is in Apple's proprietary
# typedstream format, but can be parsed with the pytypedstream package
# (<https://pypi.org/project/pytypedstream/>)
def decode_message_attributedbody(data):
    if not data:
        return None
    for event in TypedStreamReader.from_data(data):
        # The first bytes object is the one we want
        if type(event) is bytes:
            return event.decode("utf-8")


def main():
    db_path = os.path.expanduser("~/Library/Messages/chat.db")
    with sqlite3.connect(db_path) as connection:
        messages_df = pd.read_sql_query(
            sql="SELECT text, attributedBody FROM message ORDER BY date DESC",
            con=connection,
            parse_dates={"datetime": "ISO8601"},
        )
        # Decode any attributedBody values and merge them into the 'text' column
        messages_df["text"] = messages_df["text"].fillna(
            messages_df["attributedBody"].apply(decode_message_attributedbody)
        )
        print(messages_df["text"])


if __name__ == "__main__":
    main()

Source Link

answered Jan 18, 2024 at 5:14

caleb531

import os
import os.path
import sqlite3

import pandas as pd
from typedstream.stream import TypedStreamReader


# The textual contents of some messages are encoded in a special attributedBody
# column on the message row; this attributedBody value is in Apple's proprietary
# typedstream format, but can be parsed with the pytypedstream package
# (<https://pypi.org/project/pytypedstream/>)
def decode_message_attributedbody(data):
    if not data:
        return None
    for event in TypedStreamReader.from_data(data):
        # The first bytes object is the one we want
        if type(event) is bytes:
            return event.decode("utf-8")


def main():
    db_path = os.path.expanduser("~/Library/Messages/chat.db")
    with sqlite3.connect(db_path) as connection:
        messages_df = pd.read_sql_query(
            sql="SELECT text, attributedBody FROM message ORDER BY date DESC",
            con=connection,
            parse_dates={"datetime": "ISO8601"},
        )
        # Decode any attributedBody values and merge them into the 'text' column
        messages_df["text"] = messages_df["text"].fillna(
            messages_df["attributedBody"].apply(decode_message_attributedbody)
        )
        print(messages_df["text"])


if __name__ == "__main__":
    main()