How to see all content on lingq?

bamboozled · May 8, 2024, 9:44am

This just uses the “search” page via API and puts it into a csv file. The code is pretty bad, so beware…
One could extend it to read the entire JSON data, it comes from here: https://www.lingq.com/api/v3/pt/search/
If you need more info, i.e. from inside the courses, you could do something like: Export your lingq text content as a tree of files to Google drive - #10 by bamboozled But it’s not super interesting and slow on top of that.

Hope it helps. As a librarian I sometimes like to see what courses don’t have tags, or just get general statistics, e.g. how many courses / lessons / how much total audio hours are in a language.

Summary

import csv
import requests
import os
from dataclasses import dataclass
from typing import Optional, Dict, Any, List
from dotenv import load_dotenv

load_dotenv()
API_KEY = os.getenv("API_KEY")

language_code = "pt"

headers = {
    "Authorization": f"Token {API_KEY}",
    "Accept": "application/json",
    "Accept-Encoding": "gzip, deflate, br",
}

library_params = {
    "level": [1, 2, 3, 4, 5, 6],
    "page": 1,
    "page_size": 1000,
    "type": "collection",
    "sortBy": "mostLiked",
}


def init_session(headers: Dict[str, str] = headers) -> requests.Session:
    session = requests.Session()
    session.headers.update(headers)
    return session


def fetch_data(
    session: requests.Session, url: str, params: Dict[str, Any]
) -> Optional[Dict]:
    response = session.get(url, params=params)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Failed to fetch data. Status code: {response.status_code}")
        return None


@dataclass
class Collection:
    id: int
    type: str
    title: str
    status: str
    sharedByName: str
    sharedByRole: str
    lessonsCount: int
    rosesCount: int
    viewsCount: int
    duration: int
    level: str
    date: str
    tags: List[str]
    url: str

    # TODO Optional[str] = field(default=None)
    # Catch unspecified fields:
    def __init__(self, *args, **kwargs):
        self.__dict__.update(kwargs)


def fetch_language():
    session = init_session()
    library_data = fetch_data(
        session, f"https://www.lingq.com/api/v3/{language_code}/search/", library_params
    )

    if library_data is not None:
        collections = [Collection(**item) for item in library_data["results"]]
        make_csv(collections)

    else:
        print("Failed to fetch library data.")


def make_csv(collections):
    with open("collections.csv", "w", newline="") as file:
        writer = csv.writer(file)
        writer.writerow(
            [
                "ID",
                "Type",
                "Title",
                "Status",
                "Shared By",
                "Role",
                "Lessons",
                "Roses",
                "Duration",
                "Level",
                "Date",
                "Tags",
                "URL",
            ]
        )
        for collection in collections:
            writer.writerow(
                [
                    collection.id,
                    collection.type,
                    collection.title,
                    collection.status,
                    collection.sharedByName,
                    collection.sharedByRole,
                    collection.lessonsCount,
                    collection.rosesCount,
                    collection.duration,
                    collection.level,
                    collection.date,
                    collection.tags,
                    collection.url,
                ]
            )


if __name__ == "__main__":
    fetch_language()