<?php
/**
* Xfinity Cable Modem Stats Gatherer.
*
* This will gather stats about your Xfinity cable modem, at least for model CGM4140COM.
* It will output its data in Influx line format for ingestion into Influx.
*
* You'll need to install php-cli, php-curl, php-xml
*/
// If debugging locally, set to true.
define( 'DEBUG', false );
if ( defined( 'DEBUG' ) && DEBUG ) {
ini_set( 'display_errors', 1 );
ini_set( 'display_startup_errors', 1 );
error_reporting( E_ALL );
}
/**
* Xfinity Modem Stats Class
*/
class Xfinity_Modem_Stats {
/**
* Modem URL
*
* @var string
*/
public static $modem_url = 'http://10.0.0.1';
/**
* Login Page
*
* @var string
*/
public static $login_page = '/check.jst';
/**
* Admin username
*
* @var string
*/
public static $username = 'admin';
/**
* Admin password
*
* @var string
*/
public static $password = 'hunter2';
/**
* Stats Data page
*
* @var string
*/
public static $data_page = '/network_setup.jst';
/**
* Tagpass Tag
*
* @var string
*/
public static $tag = 'xfinity_modem';
/**
* Grabs an authentication cookie.
*
* @return string Auth Cookie value
*/
public static function authenticate() {
$login_cookie = Cache::get( 'login_cookie' );
if ( ! $login_cookie ) {
$ch = curl_init( self::$modem_url . self::$login_page );
$data = sprintf(
'username=%s&password=%s&locale=false',
self::$username,
self::$password
);
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true );
curl_setopt( $ch, CURLOPT_POST, true );
curl_setopt( $ch, CURLOPT_POSTFIELDS, $data );
curl_setopt( $ch, CURLOPT_HEADER, true ); // Get headers in the response.
$response = curl_exec( $ch );
// Extract cookie from response.
preg_match( '/Set-Cookie: DUKSID=(.*?);/', $response, $matches );
$login_cookie = $matches[1];
Cache::set( 'login_cookie', $login_cookie, 'default', 60 * 5 ); // 5 minutes.
}
return $login_cookie;
}
/**
* Gets modem stats HTML page.
*
* @param boolean $login_cookie Auth cookie value.
*
* @return string HTML page for stats
*/
public static function get_modem_stats_html( $login_cookie = false ) {
if ( ! $login_cookie ) {
return false;
}
$html = Cache::get( 'modem_stats_html' );
if ( ! $html ) {
$ch = curl_init( self::$modem_url . self::$data_page );
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true );
curl_setopt( $ch, CURLOPT_HTTPHEADER, array( sprintf( 'Cookie: DUKSID=%s', $login_cookie ) ) );
$html = curl_exec( $ch );
curl_close( $ch );
$ttl = defined( 'DEBUG' ) ? 5 : 60;
Cache::set( 'modem_stats_html', $html, 'default', $ttl );
}
if ( str_contains( $html, '<script type="text/javascript">alertLoc("Please Login First!"); location.href="home_loggedout.jst";</script>' ) ) {
Cache::delete( 'login_cookie' );
die( 'Login Unsuccesful!' );
}
return $html;
}
/**
* Parse the HTML and get the CM Channel stats
*
* @param boolean $html Stats Page HTML.
*
* @return string JSON containing channel stats.
*/
public static function parse_cm_stats( $html = false ) {
if ( false === $html ) {
return false;
}
// Step 3: Parse the HTML and extract the table contents.
$doc = new DOMDocument();
@$doc->loadHTML( $html ); // phpcs:ignore Squiz.Commenting.FunctionCommentThrowTag.Missing
// Get the table that has "CM Error Codewords" in the thead.
$tables = $doc->getElementsByTagName( 'table' );
$target_table = null;
foreach ( $tables as $table ) {
if ( ! str_contains( $table->nodeValue, 'CM Error Codewords' ) ) {
continue;
}
$target_table = $table;
}
if ( ! $target_table ) {
Cache::delete( 'modem_stats_html' );
die( 'Target table not found.' );
}
// Extract data from target table.
$headers = array();
foreach ( $target_table->getElementsByTagName( 'th' ) as $header ) {
$headers[] = trim( $header->nodeValue );
}
$rows = $target_table->getElementsByTagName( 'tr' );
$data = array();
foreach ( $rows as $row_index => $row ) {
// We don't need the header again.
if ( str_contains( $row->nodeValue, 'CM Error Codewords' ) ) {
continue;
}
foreach ( $row->getElementsByTagName( 'td' ) as $cell_index => $cell ) {
switch ( $row_index ) {
case 1: // Channel ID.
$data[ $headers[ $row_index - 1 ] ][] = (int) $cell->nodeValue;
break;
case 2: // Unerrored Codewords.
$data[ $headers[ $row_index - 1 ] ][] = (int) $cell->nodeValue;
break;
case 3: // Correctable Codewords.
$data[ $headers[ $row_index - 1 ] ][] = (int) $cell->nodeValue;
break;
case 4: // Uncorrectable Codewords.
$data[ $headers[ $row_index - 1 ] ][] = (int) $cell->nodeValue;
break;
default:
if ( defined( 'DEBUG' ) && DEBUG ) {
var_dump( $row );
}
throw new Exception( 'Unexpected Table Row Found.' );
}
}
}
// Step 4: Convert the filtered table content to JSON.
$json_result = json_encode( $data, JSON_PRETTY_PRINT );
return $json_result;
}
/**
* Parse the HTML and get the Downstream Bonding stats
*
* @param boolean $html Stats Page HTML.
*
* @return string JSON containing channel stats.
*/
public static function parse_downstream_bonding( $html = false ) {
if ( false === $html ) {
return false;
}
// Step 3: Parse the HTML and extract the table contents.
$doc = new DOMDocument();
@$doc->loadHTML( $html ); // phpcs:ignore Squiz.Commenting.FunctionCommentThrowTag.Missing
// Get the table that has "CM Error Codewords" in the thead.
$tables = $doc->getElementsByTagName( 'table' );
$target_table = null;
foreach ( $tables as $table ) {
if ( ! str_contains( $table->nodeValue, 'Channel Bonding Value' ) ) {
continue;
}
if ( ! str_contains( $table->nodeValue, 'Downstream' ) ) {
continue;
}
$target_table = $table;
}
if ( ! $target_table ) {
Cache::delete( 'modem_stats_html' );
die( 'Target table not found.' );
}
// Extract data from target table.
$headers = array();
foreach ( $target_table->getElementsByTagName( 'th' ) as $header ) {
$headers[] = trim( $header->nodeValue );
}
$rows = $target_table->getElementsByTagName( 'tr' );
$data = array();
foreach ( $rows as $row_index => $row ) {
// We don't need the header again.
if ( str_contains( $row->nodeValue, 'Channel Bonding Value' ) ) {
continue;
}
foreach ( $row->getElementsByTagName( 'td' ) as $cell_index => $cell ) {
switch ( $row_index ) {
case 1: // Channel ID.
$data[ $headers[ $row_index - 1 ] ][] = (int) $cell->nodeValue;
break;
case 2: // Lock Status.
// This is a string, so may not be compatible with Influx. Uncomment if you want it anyway.
// $data[ $headers[ $row_index - 1 ] ][] = trim( $cell->nodeValue );
break;
case 3: // Frequency.
if ( trim( str_replace( 'MHz', '', $cell->nodeValue ) ) > 1000000 ) {
// Convert Hz to MHz.
$data[ $headers[ $row_index - 1 ] ][] = trim( str_replace( 'MHz', '', $cell->nodeValue ) ) / 1000000;
} else {
$data[ $headers[ $row_index - 1 ] ][] = (int) trim( str_replace( 'MHz', '', $cell->nodeValue ) );
}
break;
case 4: // SNR.
$data[ $headers[ $row_index - 1 ] ][] = trim( str_replace( 'dB', '', $cell->nodeValue ) );
break;
case 5: // Power Level.
$data[ $headers[ $row_index - 1 ] ][] = trim( str_replace( 'dBmV', '', $cell->nodeValue ) );
break;
case 6: // Modulation.
// This is a string, so may not be compatible with Influx. Uncomment if you want it anyway.
// $data[ $headers[ $row_index - 1 ] ][] = trim( $cell->nodeValue );
break;
default:
if ( defined( 'DEBUG' ) && DEBUG ) {
var_dump( $row );
}
throw new Exception( 'Unexpected Table Row Found.' );
}
}
}
// Step 4: Convert the filtered table content to JSON.
$json_result = json_encode( $data, JSON_PRETTY_PRINT );
return $json_result;
}
/**
* Parse the HTML and get the Upstream Bonding stats
*
* @param boolean $html Stats Page HTML.
*
* @return string JSON containing channel stats.
*/
public static function parse_upstream_bonding( $html = false ) {
if ( false === $html ) {
return false;
}
// Step 3: Parse the HTML and extract the table contents.
$doc = new DOMDocument();
@$doc->loadHTML( $html ); // phpcs:ignore Squiz.Commenting.FunctionCommentThrowTag.Missing
// Get the table that has "CM Error Codewords" in the thead.
$tables = $doc->getElementsByTagName( 'table' );
$target_table = null;
foreach ( $tables as $table ) {
if ( ! str_contains( $table->nodeValue, 'Channel Bonding Value' ) ) {
continue;
}
if ( ! str_contains( $table->nodeValue, 'Upstream' ) ) {
continue;
}
$target_table = $table;
}
if ( ! $target_table ) {
Cache::delete( 'modem_stats_html' );
die( 'Target table not found.' );
}
// Extract data from target table.
$headers = array();
foreach ( $target_table->getElementsByTagName( 'th' ) as $header ) {
$headers[] = trim( $header->nodeValue );
}
$rows = $target_table->getElementsByTagName( 'tr' );
$data = array();
foreach ( $rows as $row_index => $row ) {
// We don't need the header again.
if ( str_contains( $row->nodeValue, 'Channel Bonding Value' ) ) {
continue;
}
foreach ( $row->getElementsByTagName( 'td' ) as $cell_index => $cell ) {
switch ( $row_index ) {
case 1: // Channel ID.
$data[ $headers[ $row_index - 1 ] ][] = (int) $cell->nodeValue;
break;
case 2: // Lock Status.
// This is a string, so may not be compatible with Influx. Uncomment if you want it anyway.
// $data[ $headers[ $row_index - 1 ] ][] = trim( $cell->nodeValue );
break;
case 3: // Frequency.
if ( trim( str_replace( 'MHz', '', $cell->nodeValue ) ) > 1000000 ) {
// Convert Hz to MHz.
$data[ $headers[ $row_index - 1 ] ][] = trim( str_replace( 'MHz', '', $cell->nodeValue ) ) / 1000000;
} else {
$data[ $headers[ $row_index - 1 ] ][] = (int) trim( str_replace( 'MHz', '', $cell->nodeValue ) );
}
break;
case 4: // Symbol Rate.
$data[ $headers[ $row_index - 1 ] ][] = trim( $cell->nodeValue );
break;
case 5: // Power Level.
$data[ $headers[ $row_index - 1 ] ][] = trim( str_replace( 'dBmV', '', $cell->nodeValue ) );
break;
case 6: // Modulation.
// This is a string, so may not be compatible with Influx. Uncomment if you want it anyway.
// $data[ $headers[ $row_index - 1 ] ][] = trim( $cell->nodeValue );
break;
case 7: // Channel Type.
// This is a string, so may not be compatible with Influx. Uncomment if you want it anyway.
// $data[ $headers[ $row_index - 1 ] ][] = trim( $cell->nodeValue );
break;
default:
if ( defined( 'DEBUG' ) && DEBUG ) {
var_dump( $row );
}
throw new Exception( 'Unexpected Table Row Found.' );
}
}
}
// Step 4: Convert the filtered table content to JSON.
$json_result = json_encode( $data, JSON_PRETTY_PRINT );
return $json_result;
}
/**
* Sanitize a header to be compatible with Influx.
*
* Removes non-alphanumeric characters and replaces spaces with underscores.
*
* @param string $str Header to sanitize.
*
* @return string Sanitized header.
*/
public static function sanitize_header( $str ) {
$str = strtolower( $str ); // Convert string to lowercase
$str = preg_replace( '/[^a-z0-9]+/', '_', $str ); // Remove non-alphanumeric characters and replace spaces with underscores
$str = trim( $str, '_' ); // Trim underscores from the beginning and end of the string
return $str;
}
/**
* Converts JSON object to Influx Line format.
*
* @param string $json Cable Modem JSON data.
* @param string $group Group for data.
*
* @return string Line format data.
*/
public static function json_to_influx_line( $json, $group ) {
$data = json_decode( $json, true );
if ( ! $data ) {
throw new Exception( 'Invalid JSON provided.' );
}
// Arrays for storing different sets of codewords.
$channel_ids = $data['Channel ID'] ?? array();
$line_data = array();
foreach ( $data as $field => $values ) {
if ( 'Channel ID' === $field ) {
continue;
}
foreach ( $values as $value ) {
$line_data[] = sprintf( '%s=%s', self::sanitize_header( $field ), $value );
}
}
$lines = array();
foreach ( $channel_ids as $index => $channel_id ) {
$line_data = array();
foreach ( $data as $field => $values ) {
if ( 'Channel ID' === $field ) {
continue;
}
$line_data[] = sprintf( '%s=%s', self::sanitize_header( $field ), $data[ $field ][ $index ] );
}
$line_data = implode( ',', $line_data );
$line = sprintf(
'%s,channel_id=%d,source=%s %s',
$group,
$channel_id,
self::$tag,
$line_data
);
$lines[] = $line;
}
return implode( "\n", $lines );
}
}
/**
* Caching Class
*/
class Cache {
/**
* Returns the base cache directory.
*
* @return string The cache directory.
*/
private static function get_cache_dir() {
return sys_get_temp_dir() . '/_php_custom_cache/';
}
/**
* Returns the cache group directory.
*
* @param string $group The cache group.
* @return string The cache group directory.
*/
private static function get_group_dir( $group = 'default' ) {
return static::get_cache_dir() . static::sanitize( $group ) . '/';
}
/**
* Returns the cache filename for a given key and group.
*
* @param string $key The cache key.
* @param string $group The cache group.
* @return string The cache filename.
*/
private static function get_cache_filename( $key, $group = 'default' ) {
return static::get_group_dir( $group ) . static::sanitize( $key ) . '.cache';
}
/**
* Sanitizes names for safe use as file and directory names.
*
* @param string $name The name to sanitize.
* @return string The sanitized name.
*/
private static function sanitize( $name ) {
return preg_replace( '/[^A-Za-z0-9\_\-]/', '', $name );
}
/**
* Retrieves the cache for the given key and group.
*
* @param string $key The cache key.
* @param string $group The cache group.
* @param bool $force Whether to force an update of the local cache from the persistent cache.
* @param bool $found Whether the key was found in the cache. Disambiguates a return of false, a storable value.
* @return bool|mixed False on failure to retrieve cache or the cache's stored value.
*/
public static function get( $key, $group = 'default', $force = false, &$found = null ) {
$filename = static::get_cache_filename( $key, $group );
if ( file_exists( $filename ) ) {
$found = true;
$data = unserialize( file_get_contents( $filename ) );
if ( $data['expire'] > time() || 0 == $data['expire'] ) {
return $data['value'];
} else {
unlink( $filename ); // Remove expired cache file.
$found = false;
return false;
}
} else {
$found = false;
return false;
}
}
/**
* Sets or updates the cache for the given key and group.
*
* @param string $key The cache key.
* @param mixed $data The data to store.
* @param string $group The cache group.
* @param int $expire When the cache data should expire, in seconds.
* @return bool True on successful set, false on failure.
*/
public static function set( $key, $data, $group = 'default', $expire = 0 ) {
$dir = static::get_group_dir( $group );
if ( ! is_dir( $dir ) ) {
mkdir( $dir, 0777, true ); // Create group directory if it does not exist.
}
$filename = static::get_cache_filename( $key, $group );
$payload = array(
'value' => $data,
'expire' => ( time() + $expire ),
);
file_put_contents( $filename, serialize( $payload ) );
return true;
}
/**
* Adds a cache for the given key and group, if it does not already exist.
*
* @param string $key The cache key.
* @param mixed $data The data to store.
* @param string $group The cache group.
* @param int $expire When the cache data should expire, in seconds.
* @return bool True on successful add, false on failure.
*/
public static function add( $key, $data, $group = 'default', $expire = 0 ) {
if ( static::get( $key, $group ) === false ) {
return static::set( $key, $data, $group, $expire );
} else {
return false;
}
}
/**
* Deletes the cache for the given key and group.
*
* @param string $key The cache key.
* @param string $group The cache group.
* @return bool True on successful delete, false on failure.
*/
public static function delete( $key, $group = 'default' ) {
$filename = static::get_cache_filename( $key, $group );
if ( file_exists( $filename ) ) {
unlink( $filename );
return true;
} else {
return false;
}
}
/**
* Deletes all cache in the given group.
*
* @param string $group The cache group.
* @return bool True on successful delete, false on failure.
*/
public static function delete_group( $group = 'default' ) {
$dir = static::get_group_dir( $group );
if ( is_dir( $dir ) ) {
$files = glob( $dir . '*' ); // Get all files in the directory.
foreach ( $files as $file ) {
if ( is_file( $file ) ) {
unlink( $file ); // Delete each file.
}
}
rmdir( $dir ); // Remove the directory.
return true;
} else {
return false;
}
}
}
$login_cookie = Xfinity_Modem_Stats::authenticate();
$html = Xfinity_Modem_Stats::get_modem_stats_html( $login_cookie );
$cm_json = Xfinity_Modem_Stats::parse_cm_stats( $html );
$db_json = Xfinity_Modem_Stats::parse_downstream_bonding( $html );
$ub_json = Xfinity_Modem_Stats::parse_upstream_bonding( $html );
echo Xfinity_Modem_Stats::json_to_influx_line( $cm_json, 'codewords' ) . PHP_EOL;
echo Xfinity_Modem_Stats::json_to_influx_line( $db_json, 'downstream_bonding' ) . PHP_EOL;
echo Xfinity_Modem_Stats::json_to_influx_line( $ub_json, 'upstream_bonding' ) . PHP_EOL;
xfinity_cm_data_scraper.php
Written by
in
Other Posts Not Worth Reading
Hey, You!
Like this kind of garbage? Subscribe for more! I post like once a month or so, unless I found something interesting to write about.
