--- source-trees/gt2-cvs/gram/jobmanager/source/globus_gram_job_manager_script.c Fri Sep 28 10:29:41 2007 +++ source-trees/gt2-cvs/gram/jobmanager/source/globus_gram_job_manager_script.c.new Tue Oct 2 13:52:44 2007 @@ -584,9 +584,12 @@ globus_gram_job_manager_script_poll_fast( globus_gram_jobmanager_request_t * request) { + int i; char * grid_monitor_output = 0; + char * grid_monitor_files[3] = { NULL, NULL, NULL }; /* Path is $GLOBUS_LOCATION/GRID_MONITOR_LOCATION$UID */ - char * GRID_MONITOR_LOCATION = "/tmp/grid_manager_monitor_agent_log."; + const char * GRID_MONITOR_LOCATION_1 = "/tmp/grid_manager_monitor_agent_log."; + const char * GRID_MONITOR_LOCATION_2 = "/tmp/gram_job_state/grid_manager_monitor_agent_log."; const char * WHITESPACE = " \t"; uid_t this_uid = geteuid(); struct stat stat_results; @@ -611,48 +614,76 @@ goto FAST_POLL_EXIT_FAILURE; } - grid_monitor_output = globus_libc_malloc( + /* The grid monitor's job status file can be in one of two places. + * We want to check both. + */ + grid_monitor_files[0] = globus_libc_malloc( strlen(request->globus_location) + - strlen(GRID_MONITOR_LOCATION) + 10); - if( ! grid_monitor_output) + strlen(GRID_MONITOR_LOCATION_1) + 10); + if( ! grid_monitor_files[0]) goto FAST_POLL_EXIT_FAILURE; - sprintf(grid_monitor_output, "%s%s%d", + sprintf(grid_monitor_files[0], "%s%s%d", request->globus_location, - GRID_MONITOR_LOCATION, + GRID_MONITOR_LOCATION_1, (int)this_uid); - grid_monitor_file = fopen(grid_monitor_output, "r"); - - if( ! grid_monitor_file ) - { - /* No monitor file? That's acceptable, silently fail */ + grid_monitor_files[1] = globus_libc_malloc( + strlen(request->globus_location) + + strlen(GRID_MONITOR_LOCATION_2) + 10); + if( ! grid_monitor_files[1]) goto FAST_POLL_EXIT_FAILURE; - } - rc = stat(grid_monitor_output, &stat_results); + sprintf(grid_monitor_files[1], "%s%s%d", + request->globus_location, + GRID_MONITOR_LOCATION_2, + (int)this_uid); + for ( i = 0; grid_monitor_files[i]; i++ ) { + grid_monitor_output = grid_monitor_files[i]; - if( rc != 0 ) - goto FAST_POLL_EXIT_FAILURE; + grid_monitor_file = fopen(grid_monitor_output, "r"); - if(stat_results.st_uid != this_uid || - !S_ISREG(stat_results.st_mode) - /* TODO: test for world writable (bad)? Is such a test logical on AFS? */ - ) - { - globus_gram_job_manager_request_log(request, - "JMI: poll_fast: Monitoring file looks untrustworthy. " - "Reverting to normal polling\n"); - goto FAST_POLL_EXIT_FAILURE; - } + if( ! grid_monitor_file ) + { + /* No monitor file? That's acceptable, silently fail */ + continue; + } + + rc = stat(grid_monitor_output, &stat_results); + if( rc != 0 ) { + fclose(grid_monitor_file); + grid_monitor_file = NULL; + continue; + } + + if(stat_results.st_uid != this_uid || + !S_ISREG(stat_results.st_mode) + /* TODO: test for world writable (bad)? Is such a test logical on AFS? */ + ) + { + globus_gram_job_manager_request_log(request, + "JMI: poll_fast: Monitoring file %s looks untrustworthy.\n", + grid_monitor_output); + fclose(grid_monitor_file); + grid_monitor_file = NULL; + continue; + } + + if( (stat_results.st_mtime + MAX_MONITOR_FILE_AGE) < time(NULL) ) + { + globus_gram_job_manager_request_log(request, + "JMI: poll_fast: Monitoring file %s looks out of date.\n", + grid_monitor_output); + fclose(grid_monitor_file); + grid_monitor_file = NULL; + continue; + } - if( (stat_results.st_mtime + MAX_MONITOR_FILE_AGE) < time(NULL) ) - { - globus_gram_job_manager_request_log(request, - "JMI: poll_fast: Monitoring file looks out of date. " - "Reverting to normal polling\n"); - goto FAST_POLL_EXIT_FAILURE; + break; + } + if ( grid_monitor_file == NULL ) { + goto FAST_POLL_EXIT_FAILURE; } /* If we got this far, we've decided we trust the file */ @@ -799,8 +830,9 @@ FAST_POLL_EXIT: if(grid_monitor_file) fclose(grid_monitor_file); - if( grid_monitor_output ) - globus_libc_free(grid_monitor_output); + for ( i = 0; grid_monitor_files[i]; i++ ) { + globus_libc_free(grid_monitor_files[i]); + } if( job_contact_match ) globus_libc_free(job_contact_match);