const fs = require('fs');
const readline = require('readline');
class Parser {
constructor() {
this.defaultConf = {
version: '0.1.24',
results: {
flat: [
['Content', 'Content'],
]
},
results_format: '$p1.textContent.collapse',
ArticleExtractor_preset: 'default'
};
this.editableConf = [
['ArticleExtractor_preset', ['combobox', 'HTML::ArticleExtractor preset']],
];
}
*parse(set, results) {
if (set.lvl === 0) {
results.SKIP = 1;
results.success = 1;
const files = fs.readdirSync(set.query);
this.logger.put(`Found ${files.length} files`);
for (let filename of files) {
this.query.add({ query: `${set.query}/${filename}`, filename });
}
}
else {
try {
var fileStream = fs.createReadStream(set.filename);
var lineReader = readline.createInterface({
input: fileStream,
crlfDelay: Infinity
});
this.logger.put(`Current file ${set.filename} `);
lineReader.on('curr_url', function (curr_url) {
this.logger.put(`Current URL ${curr_url}`);
let resp = yield this.parser.request('HTML::ArticleExtractor', this.conf.ArticleExtractor_preset, {}, curr_url);
if(resp.info.success) results.Content = resp.textContent.collapse;
results.success = resp.info.success;
});
}
catch(e) {
this.logger.put(`Error: ${e.message}`);
results.success = 0;
}
}
return results;
}
}