<?php /** * Xfinity Cable Modem Stats Gatherer. * * This will gather stats about your Xfinity cable modem, at least for model CGM4140COM. * It will output its data in Influx line format for ingestion into Influx. * * You'll need to install php-cli, php-curl, php-xml */ // If debugging locally, set to true. define( 'DEBUG', false ); if ( defined( 'DEBUG' ) && DEBUG ) { ini_set( 'display_errors', 1 ); ini_set( 'display_startup_errors', 1 ); error_reporting( E_ALL ); } /** * Xfinity Modem Stats Class */ class Xfinity_Modem_Stats { /** * Modem URL * * @var string */ public static $modem_url = 'http://10.0.0.1'; /** * Login Page * * @var string */ public static $login_page = '/check.jst'; /** * Admin username * * @var string */ public static $username = 'admin'; /** * Admin password * * @var string */ public static $password = 'hunter2'; /** * Stats Data page * * @var string */ public static $data_page = '/network_setup.jst'; /** * Tagpass Tag * * @var string */ public static $tag = 'xfinity_modem'; /** * Grabs an authentication cookie. * * @return string Auth Cookie value */ public static function authenticate() { $login_cookie = Cache::get( 'login_cookie' ); if ( ! $login_cookie ) { $ch = curl_init( self::$modem_url . self::$login_page ); $data = sprintf( 'username=%s&password=%s&locale=false', self::$username, self::$password ); curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true ); curl_setopt( $ch, CURLOPT_POST, true ); curl_setopt( $ch, CURLOPT_POSTFIELDS, $data ); curl_setopt( $ch, CURLOPT_HEADER, true ); // Get headers in the response. $response = curl_exec( $ch ); // Extract cookie from response. preg_match( '/Set-Cookie: DUKSID=(.*?);/', $response, $matches ); $login_cookie = $matches[1]; Cache::set( 'login_cookie', $login_cookie, 'default', 60 * 5 ); // 5 minutes. } return $login_cookie; } /** * Gets modem stats HTML page. * * @param boolean $login_cookie Auth cookie value. * * @return string HTML page for stats */ public static function get_modem_stats_html( $login_cookie = false ) { if ( ! $login_cookie ) { return false; } $html = Cache::get( 'modem_stats_html' ); if ( ! $html ) { $ch = curl_init( self::$modem_url . self::$data_page ); curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true ); curl_setopt( $ch, CURLOPT_HTTPHEADER, array( sprintf( 'Cookie: DUKSID=%s', $login_cookie ) ) ); $html = curl_exec( $ch ); curl_close( $ch ); $ttl = defined( 'DEBUG' ) ? 5 : 60; Cache::set( 'modem_stats_html', $html, 'default', $ttl ); } if ( str_contains( $html, '<script type="text/javascript">alertLoc("Please Login First!"); location.href="home_loggedout.jst";</script>' ) ) { Cache::delete( 'login_cookie' ); die( 'Login Unsuccesful!' ); } return $html; } /** * Parse the HTML and get the CM Channel stats * * @param boolean $html Stats Page HTML. * * @return string JSON containing channel stats. */ public static function parse_cm_stats( $html = false ) { if ( false === $html ) { return false; } // Step 3: Parse the HTML and extract the table contents. $doc = new DOMDocument(); @$doc->loadHTML( $html ); // phpcs:ignore Squiz.Commenting.FunctionCommentThrowTag.Missing // Get the table that has "CM Error Codewords" in the thead. $tables = $doc->getElementsByTagName( 'table' ); $target_table = null; foreach ( $tables as $table ) { if ( ! str_contains( $table->nodeValue, 'CM Error Codewords' ) ) { continue; } $target_table = $table; } if ( ! $target_table ) { Cache::delete( 'modem_stats_html' ); die( 'Target table not found.' ); } // Extract data from target table. $headers = array(); foreach ( $target_table->getElementsByTagName( 'th' ) as $header ) { $headers[] = trim( $header->nodeValue ); } $rows = $target_table->getElementsByTagName( 'tr' ); $data = array(); foreach ( $rows as $row_index => $row ) { // We don't need the header again. if ( str_contains( $row->nodeValue, 'CM Error Codewords' ) ) { continue; } foreach ( $row->getElementsByTagName( 'td' ) as $cell_index => $cell ) { switch ( $row_index ) { case 1: // Channel ID. $data[ $headers[ $row_index - 1 ] ][] = (int) $cell->nodeValue; break; case 2: // Unerrored Codewords. $data[ $headers[ $row_index - 1 ] ][] = (int) $cell->nodeValue; break; case 3: // Correctable Codewords. $data[ $headers[ $row_index - 1 ] ][] = (int) $cell->nodeValue; break; case 4: // Uncorrectable Codewords. $data[ $headers[ $row_index - 1 ] ][] = (int) $cell->nodeValue; break; default: if ( defined( 'DEBUG' ) && DEBUG ) { var_dump( $row ); } throw new Exception( 'Unexpected Table Row Found.' ); } } } // Step 4: Convert the filtered table content to JSON. $json_result = json_encode( $data, JSON_PRETTY_PRINT ); return $json_result; } /** * Parse the HTML and get the Downstream Bonding stats * * @param boolean $html Stats Page HTML. * * @return string JSON containing channel stats. */ public static function parse_downstream_bonding( $html = false ) { if ( false === $html ) { return false; } // Step 3: Parse the HTML and extract the table contents. $doc = new DOMDocument(); @$doc->loadHTML( $html ); // phpcs:ignore Squiz.Commenting.FunctionCommentThrowTag.Missing // Get the table that has "CM Error Codewords" in the thead. $tables = $doc->getElementsByTagName( 'table' ); $target_table = null; foreach ( $tables as $table ) { if ( ! str_contains( $table->nodeValue, 'Channel Bonding Value' ) ) { continue; } if ( ! str_contains( $table->nodeValue, 'Downstream' ) ) { continue; } $target_table = $table; } if ( ! $target_table ) { Cache::delete( 'modem_stats_html' ); die( 'Target table not found.' ); } // Extract data from target table. $headers = array(); foreach ( $target_table->getElementsByTagName( 'th' ) as $header ) { $headers[] = trim( $header->nodeValue ); } $rows = $target_table->getElementsByTagName( 'tr' ); $data = array(); foreach ( $rows as $row_index => $row ) { // We don't need the header again. if ( str_contains( $row->nodeValue, 'Channel Bonding Value' ) ) { continue; } foreach ( $row->getElementsByTagName( 'td' ) as $cell_index => $cell ) { switch ( $row_index ) { case 1: // Channel ID. $data[ $headers[ $row_index - 1 ] ][] = (int) $cell->nodeValue; break; case 2: // Lock Status. // This is a string, so may not be compatible with Influx. Uncomment if you want it anyway. // $data[ $headers[ $row_index - 1 ] ][] = trim( $cell->nodeValue ); break; case 3: // Frequency. if ( trim( str_replace( 'MHz', '', $cell->nodeValue ) ) > 1000000 ) { // Convert Hz to MHz. $data[ $headers[ $row_index - 1 ] ][] = trim( str_replace( 'MHz', '', $cell->nodeValue ) ) / 1000000; } else { $data[ $headers[ $row_index - 1 ] ][] = (int) trim( str_replace( 'MHz', '', $cell->nodeValue ) ); } break; case 4: // SNR. $data[ $headers[ $row_index - 1 ] ][] = trim( str_replace( 'dB', '', $cell->nodeValue ) ); break; case 5: // Power Level. $data[ $headers[ $row_index - 1 ] ][] = trim( str_replace( 'dBmV', '', $cell->nodeValue ) ); break; case 6: // Modulation. // This is a string, so may not be compatible with Influx. Uncomment if you want it anyway. // $data[ $headers[ $row_index - 1 ] ][] = trim( $cell->nodeValue ); break; default: if ( defined( 'DEBUG' ) && DEBUG ) { var_dump( $row ); } throw new Exception( 'Unexpected Table Row Found.' ); } } } // Step 4: Convert the filtered table content to JSON. $json_result = json_encode( $data, JSON_PRETTY_PRINT ); return $json_result; } /** * Parse the HTML and get the Upstream Bonding stats * * @param boolean $html Stats Page HTML. * * @return string JSON containing channel stats. */ public static function parse_upstream_bonding( $html = false ) { if ( false === $html ) { return false; } // Step 3: Parse the HTML and extract the table contents. $doc = new DOMDocument(); @$doc->loadHTML( $html ); // phpcs:ignore Squiz.Commenting.FunctionCommentThrowTag.Missing // Get the table that has "CM Error Codewords" in the thead. $tables = $doc->getElementsByTagName( 'table' ); $target_table = null; foreach ( $tables as $table ) { if ( ! str_contains( $table->nodeValue, 'Channel Bonding Value' ) ) { continue; } if ( ! str_contains( $table->nodeValue, 'Upstream' ) ) { continue; } $target_table = $table; } if ( ! $target_table ) { Cache::delete( 'modem_stats_html' ); die( 'Target table not found.' ); } // Extract data from target table. $headers = array(); foreach ( $target_table->getElementsByTagName( 'th' ) as $header ) { $headers[] = trim( $header->nodeValue ); } $rows = $target_table->getElementsByTagName( 'tr' ); $data = array(); foreach ( $rows as $row_index => $row ) { // We don't need the header again. if ( str_contains( $row->nodeValue, 'Channel Bonding Value' ) ) { continue; } foreach ( $row->getElementsByTagName( 'td' ) as $cell_index => $cell ) { switch ( $row_index ) { case 1: // Channel ID. $data[ $headers[ $row_index - 1 ] ][] = (int) $cell->nodeValue; break; case 2: // Lock Status. // This is a string, so may not be compatible with Influx. Uncomment if you want it anyway. // $data[ $headers[ $row_index - 1 ] ][] = trim( $cell->nodeValue ); break; case 3: // Frequency. if ( trim( str_replace( 'MHz', '', $cell->nodeValue ) ) > 1000000 ) { // Convert Hz to MHz. $data[ $headers[ $row_index - 1 ] ][] = trim( str_replace( 'MHz', '', $cell->nodeValue ) ) / 1000000; } else { $data[ $headers[ $row_index - 1 ] ][] = (int) trim( str_replace( 'MHz', '', $cell->nodeValue ) ); } break; case 4: // Symbol Rate. $data[ $headers[ $row_index - 1 ] ][] = trim( $cell->nodeValue ); break; case 5: // Power Level. $data[ $headers[ $row_index - 1 ] ][] = trim( str_replace( 'dBmV', '', $cell->nodeValue ) ); break; case 6: // Modulation. // This is a string, so may not be compatible with Influx. Uncomment if you want it anyway. // $data[ $headers[ $row_index - 1 ] ][] = trim( $cell->nodeValue ); break; case 7: // Channel Type. // This is a string, so may not be compatible with Influx. Uncomment if you want it anyway. // $data[ $headers[ $row_index - 1 ] ][] = trim( $cell->nodeValue ); break; default: if ( defined( 'DEBUG' ) && DEBUG ) { var_dump( $row ); } throw new Exception( 'Unexpected Table Row Found.' ); } } } // Step 4: Convert the filtered table content to JSON. $json_result = json_encode( $data, JSON_PRETTY_PRINT ); return $json_result; } /** * Sanitize a header to be compatible with Influx. * * Removes non-alphanumeric characters and replaces spaces with underscores. * * @param string $str Header to sanitize. * * @return string Sanitized header. */ public static function sanitize_header( $str ) { $str = strtolower( $str ); // Convert string to lowercase $str = preg_replace( '/[^a-z0-9]+/', '_', $str ); // Remove non-alphanumeric characters and replace spaces with underscores $str = trim( $str, '_' ); // Trim underscores from the beginning and end of the string return $str; } /** * Converts JSON object to Influx Line format. * * @param string $json Cable Modem JSON data. * @param string $group Group for data. * * @return string Line format data. */ public static function json_to_influx_line( $json, $group ) { $data = json_decode( $json, true ); if ( ! $data ) { throw new Exception( 'Invalid JSON provided.' ); } // Arrays for storing different sets of codewords. $channel_ids = $data['Channel ID'] ?? array(); $line_data = array(); foreach ( $data as $field => $values ) { if ( 'Channel ID' === $field ) { continue; } foreach ( $values as $value ) { $line_data[] = sprintf( '%s=%s', self::sanitize_header( $field ), $value ); } } $lines = array(); foreach ( $channel_ids as $index => $channel_id ) { $line_data = array(); foreach ( $data as $field => $values ) { if ( 'Channel ID' === $field ) { continue; } $line_data[] = sprintf( '%s=%s', self::sanitize_header( $field ), $data[ $field ][ $index ] ); } $line_data = implode( ',', $line_data ); $line = sprintf( '%s,channel_id=%d,source=%s %s', $group, $channel_id, self::$tag, $line_data ); $lines[] = $line; } return implode( "\n", $lines ); } } /** * Caching Class */ class Cache { /** * Returns the base cache directory. * * @return string The cache directory. */ private static function get_cache_dir() { return sys_get_temp_dir() . '/_php_custom_cache/'; } /** * Returns the cache group directory. * * @param string $group The cache group. * @return string The cache group directory. */ private static function get_group_dir( $group = 'default' ) { return static::get_cache_dir() . static::sanitize( $group ) . '/'; } /** * Returns the cache filename for a given key and group. * * @param string $key The cache key. * @param string $group The cache group. * @return string The cache filename. */ private static function get_cache_filename( $key, $group = 'default' ) { return static::get_group_dir( $group ) . static::sanitize( $key ) . '.cache'; } /** * Sanitizes names for safe use as file and directory names. * * @param string $name The name to sanitize. * @return string The sanitized name. */ private static function sanitize( $name ) { return preg_replace( '/[^A-Za-z0-9\_\-]/', '', $name ); } /** * Retrieves the cache for the given key and group. * * @param string $key The cache key. * @param string $group The cache group. * @param bool $force Whether to force an update of the local cache from the persistent cache. * @param bool $found Whether the key was found in the cache. Disambiguates a return of false, a storable value. * @return bool|mixed False on failure to retrieve cache or the cache's stored value. */ public static function get( $key, $group = 'default', $force = false, &$found = null ) { $filename = static::get_cache_filename( $key, $group ); if ( file_exists( $filename ) ) { $found = true; $data = unserialize( file_get_contents( $filename ) ); if ( $data['expire'] > time() || 0 == $data['expire'] ) { return $data['value']; } else { unlink( $filename ); // Remove expired cache file. $found = false; return false; } } else { $found = false; return false; } } /** * Sets or updates the cache for the given key and group. * * @param string $key The cache key. * @param mixed $data The data to store. * @param string $group The cache group. * @param int $expire When the cache data should expire, in seconds. * @return bool True on successful set, false on failure. */ public static function set( $key, $data, $group = 'default', $expire = 0 ) { $dir = static::get_group_dir( $group ); if ( ! is_dir( $dir ) ) { mkdir( $dir, 0777, true ); // Create group directory if it does not exist. } $filename = static::get_cache_filename( $key, $group ); $payload = array( 'value' => $data, 'expire' => ( time() + $expire ), ); file_put_contents( $filename, serialize( $payload ) ); return true; } /** * Adds a cache for the given key and group, if it does not already exist. * * @param string $key The cache key. * @param mixed $data The data to store. * @param string $group The cache group. * @param int $expire When the cache data should expire, in seconds. * @return bool True on successful add, false on failure. */ public static function add( $key, $data, $group = 'default', $expire = 0 ) { if ( static::get( $key, $group ) === false ) { return static::set( $key, $data, $group, $expire ); } else { return false; } } /** * Deletes the cache for the given key and group. * * @param string $key The cache key. * @param string $group The cache group. * @return bool True on successful delete, false on failure. */ public static function delete( $key, $group = 'default' ) { $filename = static::get_cache_filename( $key, $group ); if ( file_exists( $filename ) ) { unlink( $filename ); return true; } else { return false; } } /** * Deletes all cache in the given group. * * @param string $group The cache group. * @return bool True on successful delete, false on failure. */ public static function delete_group( $group = 'default' ) { $dir = static::get_group_dir( $group ); if ( is_dir( $dir ) ) { $files = glob( $dir . '*' ); // Get all files in the directory. foreach ( $files as $file ) { if ( is_file( $file ) ) { unlink( $file ); // Delete each file. } } rmdir( $dir ); // Remove the directory. return true; } else { return false; } } } $login_cookie = Xfinity_Modem_Stats::authenticate(); $html = Xfinity_Modem_Stats::get_modem_stats_html( $login_cookie ); $cm_json = Xfinity_Modem_Stats::parse_cm_stats( $html ); $db_json = Xfinity_Modem_Stats::parse_downstream_bonding( $html ); $ub_json = Xfinity_Modem_Stats::parse_upstream_bonding( $html ); echo Xfinity_Modem_Stats::json_to_influx_line( $cm_json, 'codewords' ) . PHP_EOL; echo Xfinity_Modem_Stats::json_to_influx_line( $db_json, 'downstream_bonding' ) . PHP_EOL; echo Xfinity_Modem_Stats::json_to_influx_line( $ub_json, 'upstream_bonding' ) . PHP_EOL;
xfinity_cm_data_scraper.php
Other Posts Not Worth Reading
Hey, You!
Like this kind of garbage? Subscribe for more! I post like once a month or so, unless I found something interesting to write about.