Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 | 2x 2x 2x 17x 17x 17x 106379x 136x 104281x 119x 104162x 103241x 103241x 103241x 103207x 103207x 103207x 103207x 103207x 103207x 103207x 8891x 94316x 94316x 103207x 17x 17x 17x 33133x 33133x 33133x 33133x 33133x 33133x 33133x 33133x 33133x | /**
* @module Infrastructure/SitemapXml/Scanners/News
* @category Intelligence Operations / Supporting Infrastructure
* @name News article scanner — base-slug grouped
*
* @description
* Walks `news/` recursively and groups files by their base slug (without
* the language suffix) so each article becomes a single `ArticleGroup`
* with a list of available languages and a unified `lastmod`. The unified
* `lastmod` is the **maximum** git timestamp across the article's
* language variants. Sorted alphabetically by base slug for stable XML
* output.
*
* Round-6 split: extracted from `scripts/generate-sitemap.ts`.
*
* @author Hack23 AB (Infrastructure Team)
* @license Apache-2.0
*/
import fs from 'fs';
import path from 'path';
import { fileURLToPath } from 'url';
import { getFileModTime } from '../git-timestamps.js';
import { getBySubfolder } from '../../render-lib/article-types.js';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const NEWS_DIR = path.join(__dirname, '..', '..', '..', 'news');
/** Grouped article descriptor: one entry per base slug across all languages. */
export interface ArticleGroup {
baseSlug: string;
languages: string[];
lastmod: string;
}
/**
* Get news articles with metadata.
* Supports date-based subdirectory structure: news/{year}/{month}/article.html
*/
export function getNewsArticles(): ArticleGroup[] {
console.log('📰 Scanning news directory...');
Iif (!fs.existsSync(NEWS_DIR)) {
console.warn('⚠️ News directory not found');
return [];
}
const articles = new Map<string, ArticleGroup>();
function scanDir(dir: string): void {
const entries = fs.readdirSync(dir, { withFileTypes: true }).sort((a, b) => a.name.localeCompare(b.name));
for (const entry of entries) {
if (entry.isDirectory()) {
scanDir(path.join(dir, entry.name));
} else if (entry.isFile() && entry.name !== 'index.html' && !entry.name.startsWith('index_') && entry.name.endsWith('.html')) {
const file = entry.name;
const match = file.match(/^(.+?)-(en|sv|da|no|fi|de|fr|es|nl|ar|he|ja|ko|zh)\.html$/);
if (match) {
const baseSlug = match[1]!;
const lang = match[2]!;
const filePath = path.join(dir, file);
const fileModTime = getFileModTime(filePath);
const relDir = path.relative(NEWS_DIR, dir).split(path.sep).join('/');
const fullBaseSlug = relDir ? `${relDir}/${baseSlug}` : baseSlug;
if (!articles.has(fullBaseSlug)) {
articles.set(fullBaseSlug, {
baseSlug: fullBaseSlug,
languages: [],
lastmod: fileModTime,
});
} else {
const article = articles.get(fullBaseSlug)!;
Iif (!article.lastmod || new Date(fileModTime) > new Date(article.lastmod)) {
article.lastmod = fileModTime;
}
}
articles.get(fullBaseSlug)!.languages.push(lang);
}
}
}
}
scanDir(NEWS_DIR);
console.log(` Found ${articles.size} news article groups`);
return Array.from(articles.values()).sort((a, b) => {
const dateCmp = b.lastmod.localeCompare(a.lastmod);
Iif (dateCmp !== 0) return dateCmp;
const subA = a.baseSlug.match(/\d{4}-\d{2}-\d{2}-(.+)/)?.[1] ?? '';
const subB = b.baseSlug.match(/\d{4}-\d{2}-\d{2}-(.+)/)?.[1] ?? '';
const entryA = getBySubfolder(subA);
const entryB = getBySubfolder(subB);
const horizonA = entryA?.horizonDays ?? 0;
const horizonB = entryB?.horizonDays ?? 0;
return horizonB - horizonA;
});
}
|